diff --git a/404.html b/404.html index 882ad8f43..97a455efa 100644 --- a/404.html +++ b/404.html @@ -31,7 +31,7 @@ xcms - 4.1.4 + 4.1.6 diff --git a/LICENSE-text.html b/LICENSE-text.html index 8ac54bf24..2f6b12d7c 100644 --- a/LICENSE-text.html +++ b/LICENSE-text.html @@ -10,7 +10,7 @@ xcms - 4.1.4 + 4.1.6 diff --git a/apple-touch-icon-120x120.png b/apple-touch-icon-120x120.png index 2851b4f93..9c84456e9 100644 Binary files a/apple-touch-icon-120x120.png and b/apple-touch-icon-120x120.png differ diff --git a/apple-touch-icon-152x152.png b/apple-touch-icon-152x152.png index 2cfdd3746..11cbd3bc1 100644 Binary files a/apple-touch-icon-152x152.png and b/apple-touch-icon-152x152.png differ diff --git a/apple-touch-icon-180x180.png b/apple-touch-icon-180x180.png index 725ce5d17..5c3bcfc99 100644 Binary files a/apple-touch-icon-180x180.png and b/apple-touch-icon-180x180.png differ diff --git a/apple-touch-icon-60x60.png b/apple-touch-icon-60x60.png index 167f7abf4..0af9c28ac 100644 Binary files a/apple-touch-icon-60x60.png and b/apple-touch-icon-60x60.png differ diff --git a/apple-touch-icon-76x76.png b/apple-touch-icon-76x76.png index 2cc49599e..6d3ed54a9 100644 Binary files a/apple-touch-icon-76x76.png and b/apple-touch-icon-76x76.png differ diff --git a/apple-touch-icon.png b/apple-touch-icon.png index 072eef3da..51d348ef9 100644 Binary files a/apple-touch-icon.png and b/apple-touch-icon.png differ diff --git a/articles/LC-MS-feature-grouping.html b/articles/LC-MS-feature-grouping.html index 8f41da717..18ecbb1c0 100644 --- a/articles/LC-MS-feature-grouping.html +++ b/articles/LC-MS-feature-grouping.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:44:39 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:08:00 2024 Introduction @@ -816,7 +816,7 @@ Session information sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -841,67 +841,68 @@ Session information## ## other attached packages: ## [1] pheatmap_1.0.12 faahKO_1.43.0 MsFeatures_1.11.0 -## [4] xcms_4.1.4 MSnbase_2.29.1 ProtGenerics_1.35.0 -## [7] S4Vectors_0.41.2 mzR_2.37.0 Rcpp_1.0.11 +## [4] xcms_4.1.6 MSnbase_2.29.3 ProtGenerics_1.35.2 +## [7] S4Vectors_0.41.3 mzR_2.37.0 Rcpp_1.0.12 ## [10] Biobase_2.63.0 BiocGenerics_0.49.1 BiocParallel_1.37.0 ## [13] BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] MetaboCoreUtils_1.11.0 pkgconfig_2.0.3 -## [13] crayon_1.5.2 fastmap_1.1.1 -## [15] XVector_0.43.0 utf8_1.2.4 -## [17] rmarkdown_2.25 preprocessCore_1.65.0 -## [19] ragg_1.2.6 purrr_1.0.2 -## [21] MultiAssayExperiment_1.29.0 xfun_0.41 -## [23] zlibbioc_1.49.0 cachem_1.0.8 -## [25] GenomeInfoDb_1.39.1 jsonlite_1.8.8 -## [27] progress_1.2.3 highr_0.10 -## [29] DelayedArray_0.29.0 prettyunits_1.2.0 -## [31] parallel_4.4.0 cluster_2.1.6 -## [33] R6_2.5.1 bslib_0.6.1 -## [35] stringi_1.8.2 RColorBrewer_1.1-3 -## [37] limma_3.59.1 GenomicRanges_1.55.1 -## [39] jquerylib_0.1.4 bookdown_0.37 -## [41] SummarizedExperiment_1.33.1 iterators_1.0.14 -## [43] knitr_1.45 IRanges_2.37.0 -## [45] igraph_1.5.1 splines_4.4.0 -## [47] Matrix_1.6-4 tidyselect_1.2.0 -## [49] abind_1.4-5 yaml_2.3.7 -## [51] doParallel_1.0.17 codetools_0.2-19 -## [53] affy_1.81.0 lattice_0.22-5 -## [55] tibble_3.2.1 plyr_1.8.9 -## [57] evaluate_0.23 survival_3.5-7 -## [59] desc_1.4.2 Spectra_1.13.2 -## [61] pillar_1.9.0 affyio_1.73.0 -## [63] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [65] foreach_1.5.2 MALDIquant_1.22.1 -## [67] ncdf4_1.22 generics_0.1.3 -## [69] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 MetaboCoreUtils_1.11.2 +## [13] pkgconfig_2.0.3 crayon_1.5.2 +## [15] fastmap_1.1.1 XVector_0.43.1 +## [17] utf8_1.2.4 rmarkdown_2.25 +## [19] preprocessCore_1.65.0 ragg_1.2.7 +## [21] purrr_1.0.2 xfun_0.41 +## [23] MultiAssayExperiment_1.29.0 zlibbioc_1.49.0 +## [25] cachem_1.0.8 GenomeInfoDb_1.39.5 +## [27] jsonlite_1.8.8 progress_1.2.3 +## [29] highr_0.10 DelayedArray_0.29.0 +## [31] prettyunits_1.2.0 parallel_4.4.0 +## [33] cluster_2.1.6 R6_2.5.1 +## [35] RColorBrewer_1.1-3 bslib_0.6.1 +## [37] stringi_1.8.3 limma_3.59.1 +## [39] GenomicRanges_1.55.2 jquerylib_0.1.4 +## [41] bookdown_0.37 SummarizedExperiment_1.33.3 +## [43] iterators_1.0.14 knitr_1.45 +## [45] IRanges_2.37.1 splines_4.4.0 +## [47] Matrix_1.6-5 igraph_1.6.0 +## [49] tidyselect_1.2.0 abind_1.4-5 +## [51] yaml_2.3.8 doParallel_1.0.17 +## [53] codetools_0.2-19 affy_1.81.0 +## [55] lattice_0.22-5 tibble_3.2.1 +## [57] plyr_1.8.9 evaluate_0.23 +## [59] survival_3.5-7 desc_1.4.3 +## [61] Spectra_1.13.3 pillar_1.9.0 +## [63] affyio_1.73.0 BiocManager_1.30.22 +## [65] MatrixGenerics_1.15.0 foreach_1.5.2 +## [67] MALDIquant_1.22.2 ncdf4_1.22 +## [69] generics_0.1.3 RCurl_1.98-1.14 ## [71] hms_1.1.3 ggplot2_3.4.4 ## [73] munsell_0.5.0 scales_1.3.0 -## [75] MsExperiment_1.5.1 glue_1.6.2 +## [75] MsExperiment_1.5.4 glue_1.7.0 ## [77] lazyeval_0.2.2 tools_4.4.0 -## [79] mzID_1.41.0 robustbase_0.99-1 -## [81] QFeatures_1.13.1 vsn_3.71.0 +## [79] robustbase_0.99-1 mzID_1.41.0 +## [81] QFeatures_1.13.2 vsn_3.71.0 ## [83] RANN_2.6.1 fs_1.6.3 -## [85] XML_3.99-0.16 grid_4.4.0 -## [87] impute_1.77.0 MsCoreUtils_1.15.1 +## [85] XML_3.99-0.16.1 grid_4.4.0 +## [87] impute_1.77.0 MsCoreUtils_1.15.3 ## [89] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [91] cli_3.6.1 textshaping_0.3.7 -## [93] fansi_1.0.5 S4Arrays_1.3.1 -## [95] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [97] pcaMethods_1.95.0 gtable_0.3.4 -## [99] DEoptimR_1.1-3 sass_0.4.8 -## [101] digest_0.6.33 SparseArray_1.3.1 -## [103] farver_2.1.1 multtest_2.59.0 -## [105] memoise_2.0.1 htmltools_0.5.7 -## [107] pkgdown_2.0.7.9000 lifecycle_1.0.4 -## [109] statmod_1.5.0 MASS_7.3-60.1 +## [91] PSMatch_1.7.1 cli_3.6.2 +## [93] textshaping_0.3.7 fansi_1.0.6 +## [95] S4Arrays_1.3.2 dplyr_1.1.4 +## [97] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [99] pcaMethods_1.95.0 gtable_0.3.4 +## [101] sass_0.4.8 digest_0.6.34 +## [103] SparseArray_1.3.3 farver_2.1.1 +## [105] multtest_2.59.0 memoise_2.0.1 +## [107] htmltools_0.5.7 pkgdown_2.0.7.9000 +## [109] lifecycle_1.0.4 statmod_1.5.0 +## [111] MASS_7.3-60.2 References diff --git a/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-1.png b/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-1.png index 4b3830af2..6e96a7c44 100644 Binary files a/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-1.png and b/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-1.png differ diff --git a/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-norm-1.png b/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-norm-1.png index aa0b2830f..cb3ee7c96 100644 Binary files a/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-norm-1.png and b/articles/LC-MS-feature-grouping_files/figure-html/example-1-eic-norm-1.png differ diff --git a/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-1.png b/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-1.png index 5d237c612..214bcb7a3 100644 Binary files a/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-1.png and b/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-1.png differ diff --git a/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-norm-1.png b/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-norm-1.png index 67778e54d..1ec5c7311 100644 Binary files a/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-norm-1.png and b/articles/LC-MS-feature-grouping_files/figure-html/example-2-eic-norm-1.png differ diff --git a/articles/index.html b/articles/index.html index 7a8f8ade7..eae4a6b0a 100644 --- a/articles/index.html +++ b/articles/index.html @@ -10,7 +10,7 @@ xcms - 4.1.4 + 4.1.6 diff --git a/articles/xcms-direct-injection.html b/articles/xcms-direct-injection.html index 706216b57..8cca6e609 100644 --- a/articles/xcms-direct-injection.html +++ b/articles/xcms-direct-injection.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -251,8 +251,8 @@ Correspondence## Number of spectra: 4 ## MSn retention times: -1:59 - -1:59 minutes ## - - - Processing information - - - -## Data loaded [Thu Dec 7 08:45:30 2023] -## MSnbase version: 2.29.1 +## Data loaded [Fri Jan 26 20:08:58 2024] +## MSnbase version: 2.29.3 ## - - - Meta data - - - ## phenoData ## rowNames: 1 2 3 4 @@ -430,7 +430,7 @@ Session information sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -454,67 +454,68 @@ Session information## [8] base ## ## other attached packages: -## [1] MassSpecWavelet_1.69.0 xcms_4.1.4 MSnbase_2.29.1 -## [4] ProtGenerics_1.35.0 S4Vectors_0.41.2 mzR_2.37.0 -## [7] Rcpp_1.0.11 Biobase_2.63.0 BiocGenerics_0.49.1 +## [1] MassSpecWavelet_1.69.0 xcms_4.1.6 MSnbase_2.29.3 +## [4] ProtGenerics_1.35.2 S4Vectors_0.41.3 mzR_2.37.0 +## [7] Rcpp_1.0.12 Biobase_2.63.0 BiocGenerics_0.49.1 ## [10] BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] matrixStats_1.1.0 compiler_4.4.0 -## [7] systemfonts_1.0.5 vctrs_0.6.5 -## [9] stringr_1.5.1 MetaboCoreUtils_1.11.0 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 signal_1.8-0 -## [57] evaluate_0.23 survival_3.5-7 -## [59] desc_1.4.2 Spectra_1.13.2 -## [61] pillar_1.9.0 affyio_1.73.0 -## [63] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [65] foreach_1.5.2 MALDIquant_1.22.1 -## [67] ncdf4_1.22 generics_0.1.3 -## [69] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 matrixStats_1.2.0 +## [7] compiler_4.4.0 systemfonts_1.0.5 +## [9] vctrs_0.6.5 stringr_1.5.1 +## [11] MetaboCoreUtils_1.11.2 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] signal_1.8-0 evaluate_0.23 +## [59] survival_3.5-7 desc_1.4.3 +## [61] Spectra_1.13.3 pillar_1.9.0 +## [63] affyio_1.73.0 BiocManager_1.30.22 +## [65] MatrixGenerics_1.15.0 foreach_1.5.2 +## [67] MALDIquant_1.22.2 ncdf4_1.22 +## [69] generics_0.1.3 RCurl_1.98-1.14 ## [71] hms_1.1.3 ggplot2_3.4.4 ## [73] munsell_0.5.0 scales_1.3.0 -## [75] MsExperiment_1.5.1 glue_1.6.2 -## [77] lazyeval_0.2.2 MsFeatures_1.11.0 -## [79] tools_4.4.0 mzID_1.41.0 -## [81] robustbase_0.99-1 QFeatures_1.13.1 +## [75] MsExperiment_1.5.4 glue_1.7.0 +## [77] MsFeatures_1.11.0 lazyeval_0.2.2 +## [79] tools_4.4.0 robustbase_0.99-1 +## [81] mzID_1.41.0 QFeatures_1.13.2 ## [83] vsn_3.71.0 RANN_2.6.1 -## [85] fs_1.6.3 XML_3.99-0.16 +## [85] fs_1.6.3 XML_3.99-0.16.1 ## [87] grid_4.4.0 impute_1.77.0 -## [89] MsCoreUtils_1.15.1 colorspace_2.1-0 -## [91] GenomeInfoDbData_1.2.11 cli_3.6.1 -## [93] textshaping_0.3.7 fansi_1.0.5 -## [95] S4Arrays_1.3.1 dplyr_1.1.4 -## [97] AnnotationFilter_1.27.0 pcaMethods_1.95.0 -## [99] gtable_0.3.4 DEoptimR_1.1-3 -## [101] sass_0.4.8 digest_0.6.33 -## [103] SparseArray_1.3.1 multtest_2.59.0 -## [105] memoise_2.0.1 htmltools_0.5.7 -## [107] pkgdown_2.0.7.9000 lifecycle_1.0.4 -## [109] statmod_1.5.0 MASS_7.3-60.1 +## [89] MsCoreUtils_1.15.3 colorspace_2.1-0 +## [91] GenomeInfoDbData_1.2.11 PSMatch_1.7.1 +## [93] cli_3.6.2 textshaping_0.3.7 +## [95] fansi_1.0.6 S4Arrays_1.3.2 +## [97] dplyr_1.1.4 AnnotationFilter_1.27.0 +## [99] DEoptimR_1.1-3 pcaMethods_1.95.0 +## [101] gtable_0.3.4 sass_0.4.8 +## [103] digest_0.6.34 SparseArray_1.3.3 +## [105] multtest_2.59.0 memoise_2.0.1 +## [107] htmltools_0.5.7 pkgdown_2.0.7.9000 +## [109] lifecycle_1.0.4 statmod_1.5.0 +## [111] MASS_7.3-60.2 diff --git a/articles/xcms-direct-injection_files/figure-html/calibrationresult-1.png b/articles/xcms-direct-injection_files/figure-html/calibrationresult-1.png index 3516bb38b..3a215ab4a 100644 Binary files a/articles/xcms-direct-injection_files/figure-html/calibrationresult-1.png and b/articles/xcms-direct-injection_files/figure-html/calibrationresult-1.png differ diff --git a/articles/xcms-lcms-ms.html b/articles/xcms-lcms-ms.html index eacac9276..9b53769ca 100644 --- a/articles/xcms-lcms-ms.html +++ b/articles/xcms-lcms-ms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes Rainer, Michael WittingModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:45:39 2023 +Package: xcmsAuthors: Johannes Rainer, Michael WittingModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:08 2024 Introduction @@ -264,9 +264,9 @@ Analysis of DDA data## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] By default chromPeakSpectra returns all spectra associated with a MS1 chromatographic peak, but parameter method allows to choose and return only one spectrum per @@ -340,9 +340,9 @@ Analysis of DDA data## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] There are 5 MS2 spectra representing fragmentation of the ion(s) measured in our candidate chromatographic peak. We next reduce this to a single MS2 spectrum using the combineSpectra method @@ -368,9 +368,9 @@ Analysis of DDA data## 1 2 418.926 3505 ## ... 34 more variables/columns. ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] ## ...1 more processings. Use 'processingLog' to list all. Mass peaks from all input spectra with a difference in m/z smaller 20 ppm (parameter ppm) were combined into one peak and the @@ -742,9 +742,10 @@ Reconstruction of MS2 spectra rtr <- fenamiphos_ms1_peak[, c("rtmin", "rtmax")] mzr <- fenamiphos_ms1_peak[, c("mzmin", "mzmax")] -fenamiphos_ms1_chr <- chromatogram(swath_data, rt = rtr, mz = mzr) - -rtr <- fenamiphos_ms2_peak[, c("rtmin", "rtmax")] +fenamiphos_ms1_chr <- chromatogram(swath_data, rt = rtr, mz = mzr) +## Processing chromatographic peaks + +rtr <- fenamiphos_ms2_peak[, c("rtmin", "rtmax")] mzr <- fenamiphos_ms2_peak[, c("mzmin", "mzmax")] ## Get the isolationWindowTargetMz for spectra containing the m/z of the ## compound of interest @@ -759,13 +760,14 @@ Reconstruction of MS2 spectra + fenamiphos_ms2_chr <- chromatogram( swath_data, rt = rtr, mz = mzr, msLevel = 2L, isolationWindowTargetMz = rep(299.1, nrow(rtr))) +## Processing chromatographic peaks We can now plot the extracted ion chromatogram of the MS1 and the extracted MS2 data. - + plot(rtime(fenamiphos_ms1_chr[1, 1]), intensity(fenamiphos_ms1_chr[1, 1]), xlab = "retention time [s]", ylab = "intensity", pch = 16, @@ -790,7 +792,7 @@ Reconstruction of MS2 spectra + compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx")) @@ -808,7 +810,7 @@ Reconstruction of MS2 spectra + swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024] As a result we got a Spectra object of length equal to the number of MS1 peaks in our data. The length of a spectrum represents the number of peaks it contains. Thus, a length of 0 indicates that no matching peak (MS2 signal) could be found for the respective MS1 chromatographic peak. - + lengths(swath_spectra) ## [1] 0 0 1 1 1 0 0 0 0 0 0 0 3 0 3 4 0 3 0 1 0 9 14 1 0 ## [26] 0 15 4 1 1 2 4 6 15 12 11 2 4 13 0 0 0 0 1 2 0 1 0 0 0 @@ -845,7 +847,7 @@ Reconstruction of MS2 spectra). Metadata column "peak_id" contains the ID of the MS1 chromatographic peak: - + spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:44:39 2023
Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:08:00 2024
sessionInfo()
## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -841,67 +841,68 @@ Session information## ## other attached packages: ## [1] pheatmap_1.0.12 faahKO_1.43.0 MsFeatures_1.11.0 -## [4] xcms_4.1.4 MSnbase_2.29.1 ProtGenerics_1.35.0 -## [7] S4Vectors_0.41.2 mzR_2.37.0 Rcpp_1.0.11 +## [4] xcms_4.1.6 MSnbase_2.29.3 ProtGenerics_1.35.2 +## [7] S4Vectors_0.41.3 mzR_2.37.0 Rcpp_1.0.12 ## [10] Biobase_2.63.0 BiocGenerics_0.49.1 BiocParallel_1.37.0 ## [13] BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] MetaboCoreUtils_1.11.0 pkgconfig_2.0.3 -## [13] crayon_1.5.2 fastmap_1.1.1 -## [15] XVector_0.43.0 utf8_1.2.4 -## [17] rmarkdown_2.25 preprocessCore_1.65.0 -## [19] ragg_1.2.6 purrr_1.0.2 -## [21] MultiAssayExperiment_1.29.0 xfun_0.41 -## [23] zlibbioc_1.49.0 cachem_1.0.8 -## [25] GenomeInfoDb_1.39.1 jsonlite_1.8.8 -## [27] progress_1.2.3 highr_0.10 -## [29] DelayedArray_0.29.0 prettyunits_1.2.0 -## [31] parallel_4.4.0 cluster_2.1.6 -## [33] R6_2.5.1 bslib_0.6.1 -## [35] stringi_1.8.2 RColorBrewer_1.1-3 -## [37] limma_3.59.1 GenomicRanges_1.55.1 -## [39] jquerylib_0.1.4 bookdown_0.37 -## [41] SummarizedExperiment_1.33.1 iterators_1.0.14 -## [43] knitr_1.45 IRanges_2.37.0 -## [45] igraph_1.5.1 splines_4.4.0 -## [47] Matrix_1.6-4 tidyselect_1.2.0 -## [49] abind_1.4-5 yaml_2.3.7 -## [51] doParallel_1.0.17 codetools_0.2-19 -## [53] affy_1.81.0 lattice_0.22-5 -## [55] tibble_3.2.1 plyr_1.8.9 -## [57] evaluate_0.23 survival_3.5-7 -## [59] desc_1.4.2 Spectra_1.13.2 -## [61] pillar_1.9.0 affyio_1.73.0 -## [63] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [65] foreach_1.5.2 MALDIquant_1.22.1 -## [67] ncdf4_1.22 generics_0.1.3 -## [69] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 MetaboCoreUtils_1.11.2 +## [13] pkgconfig_2.0.3 crayon_1.5.2 +## [15] fastmap_1.1.1 XVector_0.43.1 +## [17] utf8_1.2.4 rmarkdown_2.25 +## [19] preprocessCore_1.65.0 ragg_1.2.7 +## [21] purrr_1.0.2 xfun_0.41 +## [23] MultiAssayExperiment_1.29.0 zlibbioc_1.49.0 +## [25] cachem_1.0.8 GenomeInfoDb_1.39.5 +## [27] jsonlite_1.8.8 progress_1.2.3 +## [29] highr_0.10 DelayedArray_0.29.0 +## [31] prettyunits_1.2.0 parallel_4.4.0 +## [33] cluster_2.1.6 R6_2.5.1 +## [35] RColorBrewer_1.1-3 bslib_0.6.1 +## [37] stringi_1.8.3 limma_3.59.1 +## [39] GenomicRanges_1.55.2 jquerylib_0.1.4 +## [41] bookdown_0.37 SummarizedExperiment_1.33.3 +## [43] iterators_1.0.14 knitr_1.45 +## [45] IRanges_2.37.1 splines_4.4.0 +## [47] Matrix_1.6-5 igraph_1.6.0 +## [49] tidyselect_1.2.0 abind_1.4-5 +## [51] yaml_2.3.8 doParallel_1.0.17 +## [53] codetools_0.2-19 affy_1.81.0 +## [55] lattice_0.22-5 tibble_3.2.1 +## [57] plyr_1.8.9 evaluate_0.23 +## [59] survival_3.5-7 desc_1.4.3 +## [61] Spectra_1.13.3 pillar_1.9.0 +## [63] affyio_1.73.0 BiocManager_1.30.22 +## [65] MatrixGenerics_1.15.0 foreach_1.5.2 +## [67] MALDIquant_1.22.2 ncdf4_1.22 +## [69] generics_0.1.3 RCurl_1.98-1.14 ## [71] hms_1.1.3 ggplot2_3.4.4 ## [73] munsell_0.5.0 scales_1.3.0 -## [75] MsExperiment_1.5.1 glue_1.6.2 +## [75] MsExperiment_1.5.4 glue_1.7.0 ## [77] lazyeval_0.2.2 tools_4.4.0 -## [79] mzID_1.41.0 robustbase_0.99-1 -## [81] QFeatures_1.13.1 vsn_3.71.0 +## [79] robustbase_0.99-1 mzID_1.41.0 +## [81] QFeatures_1.13.2 vsn_3.71.0 ## [83] RANN_2.6.1 fs_1.6.3 -## [85] XML_3.99-0.16 grid_4.4.0 -## [87] impute_1.77.0 MsCoreUtils_1.15.1 +## [85] XML_3.99-0.16.1 grid_4.4.0 +## [87] impute_1.77.0 MsCoreUtils_1.15.3 ## [89] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [91] cli_3.6.1 textshaping_0.3.7 -## [93] fansi_1.0.5 S4Arrays_1.3.1 -## [95] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [97] pcaMethods_1.95.0 gtable_0.3.4 -## [99] DEoptimR_1.1-3 sass_0.4.8 -## [101] digest_0.6.33 SparseArray_1.3.1 -## [103] farver_2.1.1 multtest_2.59.0 -## [105] memoise_2.0.1 htmltools_0.5.7 -## [107] pkgdown_2.0.7.9000 lifecycle_1.0.4 -## [109] statmod_1.5.0 MASS_7.3-60.1 +## [91] PSMatch_1.7.1 cli_3.6.2 +## [93] textshaping_0.3.7 fansi_1.0.6 +## [95] S4Arrays_1.3.2 dplyr_1.1.4 +## [97] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [99] pcaMethods_1.95.0 gtable_0.3.4 +## [101] sass_0.4.8 digest_0.6.34 +## [103] SparseArray_1.3.3 farver_2.1.1 +## [105] multtest_2.59.0 memoise_2.0.1 +## [107] htmltools_0.5.7 pkgdown_2.0.7.9000 +## [109] lifecycle_1.0.4 statmod_1.5.0 +## [111] MASS_7.3-60.2
## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -841,67 +841,68 @@ Session information## ## other attached packages: ## [1] pheatmap_1.0.12 faahKO_1.43.0 MsFeatures_1.11.0 -## [4] xcms_4.1.4 MSnbase_2.29.1 ProtGenerics_1.35.0 -## [7] S4Vectors_0.41.2 mzR_2.37.0 Rcpp_1.0.11 +## [4] xcms_4.1.6 MSnbase_2.29.3 ProtGenerics_1.35.2 +## [7] S4Vectors_0.41.3 mzR_2.37.0 Rcpp_1.0.12 ## [10] Biobase_2.63.0 BiocGenerics_0.49.1 BiocParallel_1.37.0 ## [13] BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] MetaboCoreUtils_1.11.0 pkgconfig_2.0.3 -## [13] crayon_1.5.2 fastmap_1.1.1 -## [15] XVector_0.43.0 utf8_1.2.4 -## [17] rmarkdown_2.25 preprocessCore_1.65.0 -## [19] ragg_1.2.6 purrr_1.0.2 -## [21] MultiAssayExperiment_1.29.0 xfun_0.41 -## [23] zlibbioc_1.49.0 cachem_1.0.8 -## [25] GenomeInfoDb_1.39.1 jsonlite_1.8.8 -## [27] progress_1.2.3 highr_0.10 -## [29] DelayedArray_0.29.0 prettyunits_1.2.0 -## [31] parallel_4.4.0 cluster_2.1.6 -## [33] R6_2.5.1 bslib_0.6.1 -## [35] stringi_1.8.2 RColorBrewer_1.1-3 -## [37] limma_3.59.1 GenomicRanges_1.55.1 -## [39] jquerylib_0.1.4 bookdown_0.37 -## [41] SummarizedExperiment_1.33.1 iterators_1.0.14 -## [43] knitr_1.45 IRanges_2.37.0 -## [45] igraph_1.5.1 splines_4.4.0 -## [47] Matrix_1.6-4 tidyselect_1.2.0 -## [49] abind_1.4-5 yaml_2.3.7 -## [51] doParallel_1.0.17 codetools_0.2-19 -## [53] affy_1.81.0 lattice_0.22-5 -## [55] tibble_3.2.1 plyr_1.8.9 -## [57] evaluate_0.23 survival_3.5-7 -## [59] desc_1.4.2 Spectra_1.13.2 -## [61] pillar_1.9.0 affyio_1.73.0 -## [63] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [65] foreach_1.5.2 MALDIquant_1.22.1 -## [67] ncdf4_1.22 generics_0.1.3 -## [69] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 MetaboCoreUtils_1.11.2 +## [13] pkgconfig_2.0.3 crayon_1.5.2 +## [15] fastmap_1.1.1 XVector_0.43.1 +## [17] utf8_1.2.4 rmarkdown_2.25 +## [19] preprocessCore_1.65.0 ragg_1.2.7 +## [21] purrr_1.0.2 xfun_0.41 +## [23] MultiAssayExperiment_1.29.0 zlibbioc_1.49.0 +## [25] cachem_1.0.8 GenomeInfoDb_1.39.5 +## [27] jsonlite_1.8.8 progress_1.2.3 +## [29] highr_0.10 DelayedArray_0.29.0 +## [31] prettyunits_1.2.0 parallel_4.4.0 +## [33] cluster_2.1.6 R6_2.5.1 +## [35] RColorBrewer_1.1-3 bslib_0.6.1 +## [37] stringi_1.8.3 limma_3.59.1 +## [39] GenomicRanges_1.55.2 jquerylib_0.1.4 +## [41] bookdown_0.37 SummarizedExperiment_1.33.3 +## [43] iterators_1.0.14 knitr_1.45 +## [45] IRanges_2.37.1 splines_4.4.0 +## [47] Matrix_1.6-5 igraph_1.6.0 +## [49] tidyselect_1.2.0 abind_1.4-5 +## [51] yaml_2.3.8 doParallel_1.0.17 +## [53] codetools_0.2-19 affy_1.81.0 +## [55] lattice_0.22-5 tibble_3.2.1 +## [57] plyr_1.8.9 evaluate_0.23 +## [59] survival_3.5-7 desc_1.4.3 +## [61] Spectra_1.13.3 pillar_1.9.0 +## [63] affyio_1.73.0 BiocManager_1.30.22 +## [65] MatrixGenerics_1.15.0 foreach_1.5.2 +## [67] MALDIquant_1.22.2 ncdf4_1.22 +## [69] generics_0.1.3 RCurl_1.98-1.14 ## [71] hms_1.1.3 ggplot2_3.4.4 ## [73] munsell_0.5.0 scales_1.3.0 -## [75] MsExperiment_1.5.1 glue_1.6.2 +## [75] MsExperiment_1.5.4 glue_1.7.0 ## [77] lazyeval_0.2.2 tools_4.4.0 -## [79] mzID_1.41.0 robustbase_0.99-1 -## [81] QFeatures_1.13.1 vsn_3.71.0 +## [79] robustbase_0.99-1 mzID_1.41.0 +## [81] QFeatures_1.13.2 vsn_3.71.0 ## [83] RANN_2.6.1 fs_1.6.3 -## [85] XML_3.99-0.16 grid_4.4.0 -## [87] impute_1.77.0 MsCoreUtils_1.15.1 +## [85] XML_3.99-0.16.1 grid_4.4.0 +## [87] impute_1.77.0 MsCoreUtils_1.15.3 ## [89] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [91] cli_3.6.1 textshaping_0.3.7 -## [93] fansi_1.0.5 S4Arrays_1.3.1 -## [95] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [97] pcaMethods_1.95.0 gtable_0.3.4 -## [99] DEoptimR_1.1-3 sass_0.4.8 -## [101] digest_0.6.33 SparseArray_1.3.1 -## [103] farver_2.1.1 multtest_2.59.0 -## [105] memoise_2.0.1 htmltools_0.5.7 -## [107] pkgdown_2.0.7.9000 lifecycle_1.0.4 -## [109] statmod_1.5.0 MASS_7.3-60.1
## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -454,67 +454,68 @@ Session information## [8] base ## ## other attached packages: -## [1] MassSpecWavelet_1.69.0 xcms_4.1.4 MSnbase_2.29.1 -## [4] ProtGenerics_1.35.0 S4Vectors_0.41.2 mzR_2.37.0 -## [7] Rcpp_1.0.11 Biobase_2.63.0 BiocGenerics_0.49.1 +## [1] MassSpecWavelet_1.69.0 xcms_4.1.6 MSnbase_2.29.3 +## [4] ProtGenerics_1.35.2 S4Vectors_0.41.3 mzR_2.37.0 +## [7] Rcpp_1.0.12 Biobase_2.63.0 BiocGenerics_0.49.1 ## [10] BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] matrixStats_1.1.0 compiler_4.4.0 -## [7] systemfonts_1.0.5 vctrs_0.6.5 -## [9] stringr_1.5.1 MetaboCoreUtils_1.11.0 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 signal_1.8-0 -## [57] evaluate_0.23 survival_3.5-7 -## [59] desc_1.4.2 Spectra_1.13.2 -## [61] pillar_1.9.0 affyio_1.73.0 -## [63] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [65] foreach_1.5.2 MALDIquant_1.22.1 -## [67] ncdf4_1.22 generics_0.1.3 -## [69] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 matrixStats_1.2.0 +## [7] compiler_4.4.0 systemfonts_1.0.5 +## [9] vctrs_0.6.5 stringr_1.5.1 +## [11] MetaboCoreUtils_1.11.2 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] signal_1.8-0 evaluate_0.23 +## [59] survival_3.5-7 desc_1.4.3 +## [61] Spectra_1.13.3 pillar_1.9.0 +## [63] affyio_1.73.0 BiocManager_1.30.22 +## [65] MatrixGenerics_1.15.0 foreach_1.5.2 +## [67] MALDIquant_1.22.2 ncdf4_1.22 +## [69] generics_0.1.3 RCurl_1.98-1.14 ## [71] hms_1.1.3 ggplot2_3.4.4 ## [73] munsell_0.5.0 scales_1.3.0 -## [75] MsExperiment_1.5.1 glue_1.6.2 -## [77] lazyeval_0.2.2 MsFeatures_1.11.0 -## [79] tools_4.4.0 mzID_1.41.0 -## [81] robustbase_0.99-1 QFeatures_1.13.1 +## [75] MsExperiment_1.5.4 glue_1.7.0 +## [77] MsFeatures_1.11.0 lazyeval_0.2.2 +## [79] tools_4.4.0 robustbase_0.99-1 +## [81] mzID_1.41.0 QFeatures_1.13.2 ## [83] vsn_3.71.0 RANN_2.6.1 -## [85] fs_1.6.3 XML_3.99-0.16 +## [85] fs_1.6.3 XML_3.99-0.16.1 ## [87] grid_4.4.0 impute_1.77.0 -## [89] MsCoreUtils_1.15.1 colorspace_2.1-0 -## [91] GenomeInfoDbData_1.2.11 cli_3.6.1 -## [93] textshaping_0.3.7 fansi_1.0.5 -## [95] S4Arrays_1.3.1 dplyr_1.1.4 -## [97] AnnotationFilter_1.27.0 pcaMethods_1.95.0 -## [99] gtable_0.3.4 DEoptimR_1.1-3 -## [101] sass_0.4.8 digest_0.6.33 -## [103] SparseArray_1.3.1 multtest_2.59.0 -## [105] memoise_2.0.1 htmltools_0.5.7 -## [107] pkgdown_2.0.7.9000 lifecycle_1.0.4 -## [109] statmod_1.5.0 MASS_7.3-60.1 +## [89] MsCoreUtils_1.15.3 colorspace_2.1-0 +## [91] GenomeInfoDbData_1.2.11 PSMatch_1.7.1 +## [93] cli_3.6.2 textshaping_0.3.7 +## [95] fansi_1.0.6 S4Arrays_1.3.2 +## [97] dplyr_1.1.4 AnnotationFilter_1.27.0 +## [99] DEoptimR_1.1-3 pcaMethods_1.95.0 +## [101] gtable_0.3.4 sass_0.4.8 +## [103] digest_0.6.34 SparseArray_1.3.3 +## [105] multtest_2.59.0 memoise_2.0.1 +## [107] htmltools_0.5.7 pkgdown_2.0.7.9000 +## [109] lifecycle_1.0.4 statmod_1.5.0 +## [111] MASS_7.3-60.2
## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -454,67 +454,68 @@ Session information## [8] base ## ## other attached packages: -## [1] MassSpecWavelet_1.69.0 xcms_4.1.4 MSnbase_2.29.1 -## [4] ProtGenerics_1.35.0 S4Vectors_0.41.2 mzR_2.37.0 -## [7] Rcpp_1.0.11 Biobase_2.63.0 BiocGenerics_0.49.1 +## [1] MassSpecWavelet_1.69.0 xcms_4.1.6 MSnbase_2.29.3 +## [4] ProtGenerics_1.35.2 S4Vectors_0.41.3 mzR_2.37.0 +## [7] Rcpp_1.0.12 Biobase_2.63.0 BiocGenerics_0.49.1 ## [10] BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] matrixStats_1.1.0 compiler_4.4.0 -## [7] systemfonts_1.0.5 vctrs_0.6.5 -## [9] stringr_1.5.1 MetaboCoreUtils_1.11.0 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 signal_1.8-0 -## [57] evaluate_0.23 survival_3.5-7 -## [59] desc_1.4.2 Spectra_1.13.2 -## [61] pillar_1.9.0 affyio_1.73.0 -## [63] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [65] foreach_1.5.2 MALDIquant_1.22.1 -## [67] ncdf4_1.22 generics_0.1.3 -## [69] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 matrixStats_1.2.0 +## [7] compiler_4.4.0 systemfonts_1.0.5 +## [9] vctrs_0.6.5 stringr_1.5.1 +## [11] MetaboCoreUtils_1.11.2 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] signal_1.8-0 evaluate_0.23 +## [59] survival_3.5-7 desc_1.4.3 +## [61] Spectra_1.13.3 pillar_1.9.0 +## [63] affyio_1.73.0 BiocManager_1.30.22 +## [65] MatrixGenerics_1.15.0 foreach_1.5.2 +## [67] MALDIquant_1.22.2 ncdf4_1.22 +## [69] generics_0.1.3 RCurl_1.98-1.14 ## [71] hms_1.1.3 ggplot2_3.4.4 ## [73] munsell_0.5.0 scales_1.3.0 -## [75] MsExperiment_1.5.1 glue_1.6.2 -## [77] lazyeval_0.2.2 MsFeatures_1.11.0 -## [79] tools_4.4.0 mzID_1.41.0 -## [81] robustbase_0.99-1 QFeatures_1.13.1 +## [75] MsExperiment_1.5.4 glue_1.7.0 +## [77] MsFeatures_1.11.0 lazyeval_0.2.2 +## [79] tools_4.4.0 robustbase_0.99-1 +## [81] mzID_1.41.0 QFeatures_1.13.2 ## [83] vsn_3.71.0 RANN_2.6.1 -## [85] fs_1.6.3 XML_3.99-0.16 +## [85] fs_1.6.3 XML_3.99-0.16.1 ## [87] grid_4.4.0 impute_1.77.0 -## [89] MsCoreUtils_1.15.1 colorspace_2.1-0 -## [91] GenomeInfoDbData_1.2.11 cli_3.6.1 -## [93] textshaping_0.3.7 fansi_1.0.5 -## [95] S4Arrays_1.3.1 dplyr_1.1.4 -## [97] AnnotationFilter_1.27.0 pcaMethods_1.95.0 -## [99] gtable_0.3.4 DEoptimR_1.1-3 -## [101] sass_0.4.8 digest_0.6.33 -## [103] SparseArray_1.3.1 multtest_2.59.0 -## [105] memoise_2.0.1 htmltools_0.5.7 -## [107] pkgdown_2.0.7.9000 lifecycle_1.0.4 -## [109] statmod_1.5.0 MASS_7.3-60.1
Package: xcmsAuthors: Johannes Rainer, Michael WittingModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:45:39 2023
Package: xcmsAuthors: Johannes Rainer, Michael WittingModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:08 2024
By default chromPeakSpectra returns all spectra associated with a MS1 chromatographic peak, but parameter method allows to choose and return only one spectrum per @@ -340,9 +340,9 @@
chromPeakSpectra
method
There are 5 MS2 spectra representing fragmentation of the ion(s) measured in our candidate chromatographic peak. We next reduce this to a single MS2 spectrum using the combineSpectra method @@ -368,9 +368,9 @@
combineSpectra
Mass peaks from all input spectra with a difference in m/z smaller 20 ppm (parameter ppm) were combined into one peak and the @@ -742,9 +742,10 @@
ppm
rtr <- fenamiphos_ms1_peak[, c("rtmin", "rtmax")] mzr <- fenamiphos_ms1_peak[, c("mzmin", "mzmax")] -fenamiphos_ms1_chr <- chromatogram(swath_data, rt = rtr, mz = mzr) - -rtr <- fenamiphos_ms2_peak[, c("rtmin", "rtmax")] +fenamiphos_ms1_chr <- chromatogram(swath_data, rt = rtr, mz = mzr)
## Processing chromatographic peaks
+rtr <- fenamiphos_ms2_peak[, c("rtmin", "rtmax")] mzr <- fenamiphos_ms2_peak[, c("mzmin", "mzmax")] ## Get the isolationWindowTargetMz for spectra containing the m/z of the ## compound of interest @@ -759,13 +760,14 @@ Reconstruction of MS2 spectra + fenamiphos_ms2_chr <- chromatogram( swath_data, rt = rtr, mz = mzr, msLevel = 2L, isolationWindowTargetMz = rep(299.1, nrow(rtr))) +## Processing chromatographic peaks We can now plot the extracted ion chromatogram of the MS1 and the extracted MS2 data. - + plot(rtime(fenamiphos_ms1_chr[1, 1]), intensity(fenamiphos_ms1_chr[1, 1]), xlab = "retention time [s]", ylab = "intensity", pch = 16, @@ -790,7 +792,7 @@ Reconstruction of MS2 spectra + compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx")) @@ -808,7 +810,7 @@ Reconstruction of MS2 spectra + swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024] As a result we got a Spectra object of length equal to the number of MS1 peaks in our data. The length of a spectrum represents the number of peaks it contains. Thus, a length of 0 indicates that no matching peak (MS2 signal) could be found for the respective MS1 chromatographic peak. - + lengths(swath_spectra) ## [1] 0 0 1 1 1 0 0 0 0 0 0 0 3 0 3 4 0 3 0 1 0 9 14 1 0 ## [26] 0 15 4 1 1 2 4 6 15 12 11 2 4 13 0 0 0 0 1 2 0 1 0 0 0 @@ -845,7 +847,7 @@ Reconstruction of MS2 spectra). Metadata column "peak_id" contains the ID of the MS1 chromatographic peak: - + spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
rtr <- fenamiphos_ms2_peak[, c("rtmin", "rtmax")] mzr <- fenamiphos_ms2_peak[, c("mzmin", "mzmax")] ## Get the isolationWindowTargetMz for spectra containing the m/z of the ## compound of interest @@ -759,13 +760,14 @@ Reconstruction of MS2 spectra + fenamiphos_ms2_chr <- chromatogram( swath_data, rt = rtr, mz = mzr, msLevel = 2L, isolationWindowTargetMz = rep(299.1, nrow(rtr))) +## Processing chromatographic peaks We can now plot the extracted ion chromatogram of the MS1 and the extracted MS2 data. - + plot(rtime(fenamiphos_ms1_chr[1, 1]), intensity(fenamiphos_ms1_chr[1, 1]), xlab = "retention time [s]", ylab = "intensity", pch = 16, @@ -790,7 +792,7 @@ Reconstruction of MS2 spectra + compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx")) @@ -808,7 +810,7 @@ Reconstruction of MS2 spectra + swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024] As a result we got a Spectra object of length equal to the number of MS1 peaks in our data. The length of a spectrum represents the number of peaks it contains. Thus, a length of 0 indicates that no matching peak (MS2 signal) could be found for the respective MS1 chromatographic peak. - + lengths(swath_spectra) ## [1] 0 0 1 1 1 0 0 0 0 0 0 0 3 0 3 4 0 3 0 1 0 9 14 1 0 ## [26] 0 15 4 1 1 2 4 6 15 12 11 2 4 13 0 0 0 0 1 2 0 1 0 0 0 @@ -845,7 +847,7 @@ Reconstruction of MS2 spectra). Metadata column "peak_id" contains the ID of the MS1 chromatographic peak: - + spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ fenamiphos_ms2_chr <- chromatogram( swath_data, rt = rtr, mz = mzr, msLevel = 2L, isolationWindowTargetMz = rep(299.1, nrow(rtr))) +## Processing chromatographic peaks We can now plot the extracted ion chromatogram of the MS1 and the extracted MS2 data. - + plot(rtime(fenamiphos_ms1_chr[1, 1]), intensity(fenamiphos_ms1_chr[1, 1]), xlab = "retention time [s]", ylab = "intensity", pch = 16, @@ -790,7 +792,7 @@ Reconstruction of MS2 spectra + compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx")) @@ -808,7 +810,7 @@ Reconstruction of MS2 spectra + swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024] As a result we got a Spectra object of length equal to the number of MS1 peaks in our data. The length of a spectrum represents the number of peaks it contains. Thus, a length of 0 indicates that no matching peak (MS2 signal) could be found for the respective MS1 chromatographic peak. - + lengths(swath_spectra) ## [1] 0 0 1 1 1 0 0 0 0 0 0 0 3 0 3 4 0 3 0 1 0 9 14 1 0 ## [26] 0 15 4 1 1 2 4 6 15 12 11 2 4 13 0 0 0 0 1 2 0 1 0 0 0 @@ -845,7 +847,7 @@ Reconstruction of MS2 spectra). Metadata column "peak_id" contains the ID of the MS1 chromatographic peak: - + spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
fenamiphos_ms2_chr <- chromatogram( swath_data, rt = rtr, mz = mzr, msLevel = 2L, isolationWindowTargetMz = rep(299.1, nrow(rtr)))
We can now plot the extracted ion chromatogram of the MS1 and the extracted MS2 data.
+ plot(rtime(fenamiphos_ms1_chr[1, 1]), intensity(fenamiphos_ms1_chr[1, 1]), xlab = "retention time [s]", ylab = "intensity", pch = 16, @@ -790,7 +792,7 @@ Reconstruction of MS2 spectra + compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx")) @@ -808,7 +810,7 @@ Reconstruction of MS2 spectra + swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024] As a result we got a Spectra object of length equal to the number of MS1 peaks in our data. The length of a spectrum represents the number of peaks it contains. Thus, a length of 0 indicates that no matching peak (MS2 signal) could be found for the respective MS1 chromatographic peak. - + lengths(swath_spectra) ## [1] 0 0 1 1 1 0 0 0 0 0 0 0 3 0 3 4 0 3 0 1 0 9 14 1 0 ## [26] 0 15 4 1 1 2 4 6 15 12 11 2 4 13 0 0 0 0 1 2 0 1 0 0 0 @@ -845,7 +847,7 @@ Reconstruction of MS2 spectra). Metadata column "peak_id" contains the ID of the MS1 chromatographic peak: - + spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plot(rtime(fenamiphos_ms1_chr[1, 1]), intensity(fenamiphos_ms1_chr[1, 1]), xlab = "retention time [s]", ylab = "intensity", pch = 16, @@ -790,7 +792,7 @@ Reconstruction of MS2 spectra + compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx")) @@ -808,7 +810,7 @@ Reconstruction of MS2 spectra + swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024]
+ compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx")) @@ -808,7 +810,7 @@ Reconstruction of MS2 spectra + swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024]
compareChromatograms(fenamiphos_ms2_chr[1, 1], fenamiphos_ms1_chr[1, 1], ALIGNFUNARGS = list(method = "approx"))
+ swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra ## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023] +## Merge 1 Spectra into one [Fri Jan 26 20:09:40 2024]
swath_spectra <- reconstructChromPeakSpectra(swath_data, minCor = 0.9) swath_spectra
## MSn data (Spectra) with 62 spectra in a MsBackendMemory backend: @@ -827,13 +829,13 @@ Reconstruction of MS2 spectra## CP62 2 574.942 NA ## ... 20 more variables/columns. ## Processing: -## Merge 1 Spectra into one [Thu Dec 7 08:46:06 2023]
As a result we got a Spectra object of length equal to the number of MS1 peaks in our data. The length of a spectrum represents the number of peaks it contains. Thus, a length of 0 indicates that no matching peak (MS2 signal) could be found for the respective MS1 chromatographic peak.
Spectra
+ lengths(swath_spectra) ## [1] 0 0 1 1 1 0 0 0 0 0 0 0 3 0 3 4 0 3 0 1 0 9 14 1 0 ## [26] 0 15 4 1 1 2 4 6 15 12 11 2 4 13 0 0 0 0 1 2 0 1 0 0 0 @@ -845,7 +847,7 @@ Reconstruction of MS2 spectra). Metadata column "peak_id" contains the ID of the MS1 chromatographic peak: - + spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
lengths(swath_spectra)
## [1] 0 0 1 1 1 0 0 0 0 0 0 0 3 0 3 4 0 3 0 1 0 9 14 1 0 ## [26] 0 15 4 1 1 2 4 6 15 12 11 2 4 13 0 0 0 0 1 2 0 1 0 0 0 @@ -845,7 +847,7 @@ Reconstruction of MS2 spectra). Metadata column "peak_id" contains the ID of the MS1 chromatographic peak: - + spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
"peak_id"
+ spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor")) ## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
spectraData(swath_spectra, c("peak_id", "ms2_peak_id", "ms2_peak_cor"))
## DataFrame with 62 rows and 3 columns ## peak_id ms2_peak_id ms2_peak_cor @@ -864,7 +866,7 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)] We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@ Reconstruction of MS2 spectra + fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
fenamiphos_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(fenamiphos_ms1_peak)]
We can now compare the reconstructed spectrum to the example @@ -874,10 +876,10 @@
+ fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun) - + par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
fenamiphos_swath_spectrum <- addProcessing(fenamiphos_swath_spectrum, scale_fun)
+ par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
par(mfrow = c(1, 2)) plotSpectraMirror(fenamiphos_swath_spectrum, ex_spectrum, ppm = 50, main = "against DDA") @@ -893,7 +895,7 @@ Reconstruction of MS2 spectra + pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids ## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
pk_ids <- fenamiphos_swath_spectrum$ms2_peak_id[[1]] pk_ids
## [1] "CP199" "CP201" "CP211" "CP208" "CP200" "CP202" "CP217" "CP215" "CP205" @@ -910,7 +912,7 @@ Reconstruction of MS2 spectra + rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range))) +## Processing chromatographic peaks Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot. - + plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
rt_range <- chromPeaks(swath_data)[pk_ids, c("rtmin", "rtmax")] mz_range <- chromPeaks(swath_data)[pk_ids, c("mzmin", "mzmax")] @@ -925,10 +927,11 @@ Reconstruction of MS2 spectrams2_eics <- chromatogram( swath_data, rt = rt_range, mz = mz_range, msLevel = 2L, isolationWindowTargetMz = rep(tmz, nrow(rt_range)))
Each row of this ms2_eics contains now the EIC of one of the MS2 chromatographic peaks. We can also plot these in an overlay plot.
ms2_eics
+ plotChromatogramsOverlay(ms2_eics) @@ -940,7 +943,7 @@ Reconstruction of MS2 spectraProchloraz). We first identify the MS1 chromatographic peak for that m/z and retrieve the reconstructed MS2 spectrum for that peak. - + prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plotChromatogramsOverlay(ms2_eics)
@@ -940,7 +943,7 @@
+ prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1 - + prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
prochloraz_mz <- 376.0381 prochloraz_ms1_peak <- chromPeaks(swath_data, msLevel = 1L, @@ -950,7 +953,7 @@ Reconstruction of MS2 spectra## CP22 376.0373 376.037 376.0374 405.046 401.446 409.546 3664.051 3655.951 ## maxo sn sample ## CP22 897.3923 278 1
+ prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum) @@ -960,7 +963,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024] In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum. - + prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
prochloraz_swath_spectrum <- swath_spectra[ swath_spectra$peak_id == rownames(prochloraz_ms1_peak)] lengths(prochloraz_swath_spectrum)
+ prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak @@ -971,7 +974,7 @@ Reconstruction of MS2 spectra + prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023] +## Filter: select retention time [230..610] on MS level(s) 1 2 [Fri Jan 26 20:09:22 2024] +## Filter: select MS level(s) 2 [Fri Jan 26 20:09:31 2024] +## Merge 1 Spectra into one [Fri Jan 26 20:09:31 2024]
prochloraz_dda_peak <- chromPeaks(dda_data, msLevel = 1L, mz = prochloraz_mz, ppm = 5) prochloraz_dda_peak
+ prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra @@ -988,12 +991,12 @@ Reconstruction of MS2 spectra## file(s): ## PestMix1_DDA.mzML ## Processing: -## Filter: select retention time [230..610] on MS level(s) 1 2 [Thu Dec 7 08:45:51 2023] -## Filter: select MS level(s) 2 [Thu Dec 7 08:45:58 2023] -## Merge 1 Spectra into one [Thu Dec 7 08:45:58 2023]
prochloraz_dda_spectra <- dda_spectra[ dda_spectra$peak_id == rownames(prochloraz_dda_peak)] prochloraz_dda_spectra
In total 5 spectra were measured, some with a relatively high number of peaks. Next we combine them into a consensus spectrum.
+ prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead. At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin. - + prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
prochloraz_dda_spectrum <- combineSpectra( prochloraz_dda_spectra, FUN = combinePeaks, ppm = 20, peaks = "intersect", minProp = 0.8, intensityFun = median, mzFun = median, @@ -1003,7 +1006,7 @@ Reconstruction of MS2 spectra## 'combinePeaksData' instead.
At last we load also the Prochloraz MS2 spectra (for different collision energies) from Metlin.
+ prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf()) @@ -1011,7 +1014,7 @@ Reconstruction of MS2 spectra + prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
prochloraz <- Spectra( system.file("mgf", "metlin-68898.mgf", package = "xcms"), source = MsBackendMgf())
+ prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
prochloraz_swath_spectrum <- addProcessing(prochloraz_swath_spectrum, scale_fun) prochloraz_dda_spectrum <- addProcessing(prochloraz_dda_spectrum, scale_fun) @@ -1032,7 +1035,7 @@ Reconstruction of MS2 spectra + plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ plotSpectra(prochloraz_swath_spectrum) @@ -1045,7 +1048,7 @@ Reconstruction of MS2 spectraMetaboCoreUtils package to check for presence of potential isotope peaks in the reconstructed MS2 spectrum for prochloraz. - + library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plotSpectra(prochloraz_swath_spectrum)
@@ -1045,7 +1048,7 @@
+ library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]]) ## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
library(MetaboCoreUtils) isotopologues(peaksData(prochloraz_swath_spectrum)[[1]])
## [[1]] @@ -1059,7 +1062,7 @@ Reconstruction of MS2 spectra + ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ ## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso) - + par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Function to keep only the first (monoisotopic) peak for potential ## isotopologue peak groups. rem_iso <- function(x, ...) { @@ -1071,7 +1074,7 @@ Reconstruction of MS2 spectra} prochloraz_swath_spectrum2 <- addProcessing(prochloraz_swath_spectrum, rem_iso)
+ par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2) @@ -1085,10 +1088,10 @@ Reconstruction of MS2 spectra + compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
par(mfrow = c(1, 2)) plotSpectra(prochloraz_swath_spectrum) plotSpectra(prochloraz_swath_spectrum2)
+ compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum) ## [1] 0.4623719 - + compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
compareSpectra(prochloraz_swath_spectrum, prochloraz_dda_spectrum)
## [1] 0.4623719
+ compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum) ## [1] 0.5932303 Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@ Outlook Session information - + sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2 References diff --git a/articles/xcms.html b/articles/xcms.html index 87e672e2a..4939f55d6 100644 --- a/articles/xcms.html +++ b/articles/xcms.html @@ -33,7 +33,7 @@ xcms - 4.1.4 + 4.1.6 @@ -100,7 +100,7 @@ if (links[i].hostname != window.location.hostname) links[i].target = '_blank'; }); -Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023 +Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024 Introduction @@ -125,7 +125,8 @@ Introductionxcms tutorial +available with more examples and details. Pre-processing of LC-MS data @@ -470,7 +471,7 @@ Chromatographic peak detectiontutorial for +also this tutorial for additional information and examples on choosing and testing peak detection settings. Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@ Chromatographic peak detection mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -668,11 +673,15 @@ Chromatographic peak detection + mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
compareSpectra(prochloraz_swath_spectrum2, prochloraz_dda_spectrum)
## [1] 0.5932303
Similar to the DDA data, the reconstructed MS2 spectra from SWATH @@ -1125,9 +1128,9 @@
+ sessionInfo() -## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2
## R Under development (unstable) (2023-11-22 r85609) +## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1 +## [89] PSMatch_1.7.1 cli_3.6.2 +## [91] textshaping_0.3.7 fansi_1.0.6 +## [93] S4Arrays_1.3.2 dplyr_1.1.4 +## [95] AnnotationFilter_1.27.0 DEoptimR_1.1-3 +## [97] pcaMethods_1.95.0 gtable_0.3.4 +## [99] sass_0.4.8 digest_0.6.34 +## [101] SparseArray_1.3.3 multtest_2.59.0 +## [103] memoise_2.0.1 htmltools_0.5.7 +## [105] pkgdown_2.0.7.9000 lifecycle_1.0.4 +## [107] statmod_1.5.0 MASS_7.3-60.2
## R Under development (unstable) (2024-01-17 r85813) ## Platform: x86_64-pc-linux-gnu ## Running under: Ubuntu 22.04.3 LTS ## @@ -1151,67 +1154,67 @@ Session information## [8] base ## ## other attached packages: -## [1] MetaboCoreUtils_1.11.0 MsBackendMgf_1.11.0 MsExperiment_1.5.1 -## [4] pander_0.6.5 Spectra_1.13.2 xcms_4.1.4 -## [7] MSnbase_2.29.1 ProtGenerics_1.35.0 S4Vectors_0.41.2 -## [10] mzR_2.37.0 Rcpp_1.0.11 Biobase_2.63.0 +## [1] MetaboCoreUtils_1.11.2 MsBackendMgf_1.11.1 MsExperiment_1.5.4 +## [4] pander_0.6.5 Spectra_1.13.3 xcms_4.1.6 +## [7] MSnbase_2.29.3 ProtGenerics_1.35.2 S4Vectors_0.41.3 +## [10] mzR_2.37.0 Rcpp_1.0.12 Biobase_2.63.0 ## [13] BiocGenerics_0.49.1 BiocParallel_1.37.0 BiocStyle_2.31.0 ## ## loaded via a namespace (and not attached): -## [1] bitops_1.0-7 rlang_1.1.2 -## [3] magrittr_2.0.3 clue_0.3-65 -## [5] MassSpecWavelet_1.69.0 matrixStats_1.1.0 -## [7] compiler_4.4.0 systemfonts_1.0.5 -## [9] vctrs_0.6.5 stringr_1.5.1 -## [11] pkgconfig_2.0.3 crayon_1.5.2 -## [13] fastmap_1.1.1 XVector_0.43.0 -## [15] utf8_1.2.4 rmarkdown_2.25 -## [17] preprocessCore_1.65.0 ragg_1.2.6 -## [19] purrr_1.0.2 MultiAssayExperiment_1.29.0 -## [21] xfun_0.41 zlibbioc_1.49.0 -## [23] cachem_1.0.8 GenomeInfoDb_1.39.1 -## [25] jsonlite_1.8.8 progress_1.2.3 -## [27] highr_0.10 DelayedArray_0.29.0 -## [29] prettyunits_1.2.0 parallel_4.4.0 -## [31] cluster_2.1.6 R6_2.5.1 -## [33] bslib_0.6.1 stringi_1.8.2 -## [35] RColorBrewer_1.1-3 limma_3.59.1 -## [37] GenomicRanges_1.55.1 jquerylib_0.1.4 -## [39] bookdown_0.37 SummarizedExperiment_1.33.1 -## [41] iterators_1.0.14 knitr_1.45 -## [43] IRanges_2.37.0 igraph_1.5.1 -## [45] splines_4.4.0 Matrix_1.6-4 -## [47] tidyselect_1.2.0 abind_1.4-5 -## [49] yaml_2.3.7 doParallel_1.0.17 -## [51] codetools_0.2-19 affy_1.81.0 -## [53] lattice_0.22-5 tibble_3.2.1 -## [55] plyr_1.8.9 evaluate_0.23 -## [57] survival_3.5-7 desc_1.4.2 -## [59] pillar_1.9.0 affyio_1.73.0 -## [61] BiocManager_1.30.22 MatrixGenerics_1.15.0 -## [63] foreach_1.5.2 MALDIquant_1.22.1 -## [65] ncdf4_1.22 generics_0.1.3 -## [67] rprojroot_2.0.4 RCurl_1.98-1.13 +## [1] DBI_1.2.1 bitops_1.0-7 +## [3] rlang_1.1.3 magrittr_2.0.3 +## [5] clue_0.3-65 MassSpecWavelet_1.69.0 +## [7] matrixStats_1.2.0 compiler_4.4.0 +## [9] systemfonts_1.0.5 vctrs_0.6.5 +## [11] stringr_1.5.1 pkgconfig_2.0.3 +## [13] crayon_1.5.2 fastmap_1.1.1 +## [15] XVector_0.43.1 utf8_1.2.4 +## [17] rmarkdown_2.25 preprocessCore_1.65.0 +## [19] ragg_1.2.7 purrr_1.0.2 +## [21] xfun_0.41 MultiAssayExperiment_1.29.0 +## [23] zlibbioc_1.49.0 cachem_1.0.8 +## [25] GenomeInfoDb_1.39.5 jsonlite_1.8.8 +## [27] progress_1.2.3 highr_0.10 +## [29] DelayedArray_0.29.0 prettyunits_1.2.0 +## [31] parallel_4.4.0 cluster_2.1.6 +## [33] R6_2.5.1 RColorBrewer_1.1-3 +## [35] bslib_0.6.1 stringi_1.8.3 +## [37] limma_3.59.1 GenomicRanges_1.55.2 +## [39] jquerylib_0.1.4 bookdown_0.37 +## [41] SummarizedExperiment_1.33.3 iterators_1.0.14 +## [43] knitr_1.45 IRanges_2.37.1 +## [45] splines_4.4.0 Matrix_1.6-5 +## [47] igraph_1.6.0 tidyselect_1.2.0 +## [49] abind_1.4-5 yaml_2.3.8 +## [51] doParallel_1.0.17 codetools_0.2-19 +## [53] affy_1.81.0 lattice_0.22-5 +## [55] tibble_3.2.1 plyr_1.8.9 +## [57] evaluate_0.23 survival_3.5-7 +## [59] desc_1.4.3 pillar_1.9.0 +## [61] affyio_1.73.0 BiocManager_1.30.22 +## [63] MatrixGenerics_1.15.0 foreach_1.5.2 +## [65] MALDIquant_1.22.2 ncdf4_1.22 +## [67] generics_0.1.3 RCurl_1.98-1.14 ## [69] hms_1.1.3 ggplot2_3.4.4 ## [71] munsell_0.5.0 scales_1.3.0 -## [73] glue_1.6.2 lazyeval_0.2.2 -## [75] MsFeatures_1.11.0 tools_4.4.0 -## [77] mzID_1.41.0 robustbase_0.99-1 -## [79] QFeatures_1.13.1 vsn_3.71.0 +## [73] glue_1.7.0 MsFeatures_1.11.0 +## [75] lazyeval_0.2.2 tools_4.4.0 +## [77] robustbase_0.99-1 mzID_1.41.0 +## [79] QFeatures_1.13.2 vsn_3.71.0 ## [81] RANN_2.6.1 fs_1.6.3 -## [83] XML_3.99-0.16 grid_4.4.0 -## [85] impute_1.77.0 MsCoreUtils_1.15.1 +## [83] XML_3.99-0.16.1 grid_4.4.0 +## [85] impute_1.77.0 MsCoreUtils_1.15.3 ## [87] colorspace_2.1-0 GenomeInfoDbData_1.2.11 -## [89] cli_3.6.1 textshaping_0.3.7 -## [91] fansi_1.0.5 S4Arrays_1.3.1 -## [93] dplyr_1.1.4 AnnotationFilter_1.27.0 -## [95] pcaMethods_1.95.0 gtable_0.3.4 -## [97] DEoptimR_1.1-3 sass_0.4.8 -## [99] digest_0.6.33 SparseArray_1.3.1 -## [101] multtest_2.59.0 memoise_2.0.1 -## [103] htmltools_0.5.7 pkgdown_2.0.7.9000 -## [105] lifecycle_1.0.4 statmod_1.5.0 -## [107] MASS_7.3-60.1
Package: xcmsAuthors: Johannes RainerModified: 2023-12-07 07:39:24.923344Compiled: Thu Dec 7 08:46:13 2023
Package: xcmsAuthors: Johannes RainerModified: 2024-01-26 18:14:18.95093Compiled: Fri Jan 26 20:09:48 2024
Chromatographic peak detection can also be performed on extracted ion @@ -651,9 +652,13 @@
mzr_1 <- 305.1 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1)
+chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1)
chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1)
+par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2)
par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2)
+ mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1) +## Processing chromatographic peaks + +chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) +## Processing chromatographic peaks + +par(mfrow = c(1, 2)) plot(chr_1) plot(chr_2) @@ -686,25 +695,25 @@ Chromatographic peak detection + res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
mzr_1 <- 496.2 + c(-0.01, 0.01) -chr_1 <- chromatogram(faahko[1], mz = mzr_1) -chr_2 <- chromatogram(faahko_pp[1], mz = mzr_1) -par(mfrow = c(1, 2)) +chr_1 <- chromatogram(faahko[1], mz = mzr_1)
+ res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1 - + plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
res <- refineChromPeaks(chr_1, MergeNeighboringPeaksParam(minProp = 0.05)) chromPeaks(res)
## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CPM1 496.2 496.19 496.21 3384.012 3294.809 3412.181 45940118 NA 1128960 177 ## sample row column ## CPM1 1 1 1
+ plot(res) Before proceeding we next replace the faahko object with the results from the peak refinement step. - + faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plot(res)
Before proceeding we next replace the faahko object with the results from the peak refinement step.
faahko
+ faahko <- faahko_pp Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths. - + summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
faahko <- faahko_pp
Below we use the data from the chromPeaks matrix to calculate per-file summaries of the peak detection results, such as the number of peaks per file as well as the distribution of the retention time widths.
chromPeaks
+ summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
summary_fun <- function(z) c(peak_count = nrow(z), rt = quantile(z[, "rtmax"] - z[, "rtmin"])) @@ -819,7 +828,7 @@ Chromatographic peak detection + chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900)) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
chromPeaks(faahko, mz = c(334.9, 335.1), rt = c(2700, 2900))
## mz mzmin mzmax rt rtmin rtmax into intb maxo sn ## CP0038 335 335 335 2781.505 2761.160 2809.674 412134.3 383167.4 16856 23 @@ -837,7 +846,7 @@ Chromatographic peak detection + plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ plotChromPeaks(faahko, file = 3) @@ -851,7 +860,7 @@ Chromatographic peak detection + plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plotChromPeaks(faahko, file = 3)
@@ -851,7 +860,7 @@
+ plotChromPeakImage(faahko, binSize = 10) @@ -869,7 +878,7 @@ Chromatographic peak detection + chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plotChromPeakImage(faahko, binSize = 10)
@@ -869,7 +878,7 @@
+ chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7 We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region. - + sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
chr_ex <- chromatogram(faahko, mz = mzr, rt = rtr) chromPeaks(chr_ex)
## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -886,7 +895,7 @@ Chromatographic peak detection## CP2349 7 1 7
We can also plot this extracted ion chromatogram which will also visualize all identified chromatographic peaks in that region.
+ sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]]) @@ -905,7 +914,7 @@ Chromatographic peak detection). Below we plot the data again using peakType = "rectangle". - + plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
sample_colors <- group_colors[chr_ex$sample_group] plot(chr_ex, col = group_colors[chr_raw$sample_group], lwd = 2, peakBg = sample_colors[chromPeaks(chr_ex)[, "sample"]])
peakType = "rectangle"
+ plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA) @@ -919,7 +928,7 @@ Chromatographic peak detection + ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plot(chr_ex, col = sample_colors, peakType = "rectangle", peakCol = sample_colors[chromPeaks(chr_ex)[, "sample"]], peakBg = NA)
+ ## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Extract a list of per-sample peak intensities (in log2 scale) ints <- split(log2(chromPeaks(faahko)[, "into"]), f = chromPeaks(faahko)[, "sample"]) @@ -961,7 +970,7 @@ Alignment + faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6)) Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@ Alignment + ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
faahko <- adjustRtime(faahko, param = ObiwarpParam(binSize = 0.6))
Note that adjustRtime, besides calculating adjusted retention times for each spectrum, adjusts also the retention times of @@ -970,15 +979,15 @@
adjustRtime
+ ## Extract adjusted retention times adjustedRtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Extract adjusted retention times adjustedRtime(faahko) |> head()
## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615
+ ## Or simply use the rtime method rtime(faahko) |> head() ## [1] 2551.457 2553.089 2554.720 2556.352 2557.983 2559.615 - + ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Or simply use the rtime method rtime(faahko) |> head()
+ ## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head() ## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281 @@ -989,7 +998,7 @@ Alignment below. - + ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Get raw (unadjusted) retention times rtime(faahko, adjusted = FALSE) |> head()
## [1] 2551.457 2553.022 2554.586 2556.151 2557.716 2559.281
+ ## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Get the base peak chromatograms. bpis_adj <- chromatogram(faahko, aggregationFun = "max", chromPeaks = "none") par(mfrow = c(3, 1), mar = c(4.5, 4.2, 1, 0.5)) @@ -1011,7 +1020,7 @@ Alignment + par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
par(mfrow = c(2, 1)) ## Plot the raw data plot(chr_raw, col = sample_colors) @@ -1101,7 +1110,7 @@ Subset-based alignmentrestore the original retention times for identified chromatographic peaks. - + faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam. If there are no sample groups in the experiment, sampleGroups should be set to a single value for each file (e.g. rep(1, length(fileNames(faahko))). - + ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
faahko <- dropAdjustedRtime(faahko) ## Define the experimental layout @@ -1119,7 +1128,7 @@ Subset-based alignmentPeakDensityParam
sampleGroups
rep(1, length(fileNames(faahko))
+ ## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Initial peak grouping. Use sample_type as grouping variable pdp_subs <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_type, minFraction = 0.9) @@ -1139,7 +1148,7 @@ Subset-based alignment - + clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups is used to define to which sample group each sample belongs. - + ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
clrs <- rep("#00000040", 8) clrs[sampleData(faahko)$sample_type == "QC"] <- c("#00ce0080") par(mfrow = c(2, 1), mar = c(4, 4.5, 1, 0.5)) @@ -1187,7 +1196,7 @@ CorrespondencesampleGroups
+ ## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw will be very data set dependent (or more specifically LC-dependent) and should be adapted to each data -set. See the Metabolomics -pre-processing with xcms tutorial for examples and more -details. - -## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Define the mz slice. mzr <- c(305.05, 305.15) @@ -1226,14 +1235,60 @@ Correspondencebw
xcms
-## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx". Below we show the information on the first 6 features. - + featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## Perform the correspondence +set. +Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set. +See also the xcms +tutorial for more examples and details. + +## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp) +As an alternative we perform the correspondence using m/z relative +bin sizes. + +## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10)) +The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments. + +## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010") + + +Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. + + Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@ Correspondence"peakidx"
Another important parameter is binSize that defines the +size of the m/z slices (bins) within which peaks are being grouped. This +parameter thus defines the required similarity in m/z values for the +chromatographic peaks that are then assumed to represent signal from the +same (type of ion of a) compound and hence evaluated for grouping. By +default, a constant m/z bin size is used, but by changing parameter +ppm to a value larger than 0, m/z-relative bin sizes would +be used instead (i.e., the bin size will increase with the m/z value +hence better representing the measurement error/precision of some MS +instruments). The bin sizes (and subsequently the m/z width of the +defined features) would then reach a maximal value of +binSize plus ppm parts-per-million of the +largest m/z value of any chromatographic peak in the data set.
binSize
See also the xcms +tutorial for more examples and details.
+## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp)
## Perform the correspondence using fixed m/z bin sizes. pdp <- PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, minFraction = 0.4, bw = 30) faahko <- groupChromPeaks(faahko, param = pdp)
As an alternative we perform the correspondence using m/z relative +bin sizes.
+## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10))
## Drop feature definitions and re-perform the correspondence +## using m/z-relative bin sizes. +faahko_ppm <- groupChromPeaks( + dropFeatureDefinitions(faahko), + PeakDensityParam(sampleGroups = sampleData(faahko)$sample_group, + minFraction = 0.4, bw = 30, ppm = 10))
The results will be mostly similar, except for the higher +m/z range (in which larger m/z bins will be used). Below we plot the m/z +range for features against their median m/z. For the present data set +(acquired with a triple quad instrument) no clear difference can be seen +for the two approaches hence we proceed the analysis with the fixed bin +size setting. A stronger relationship would be expected for example for +data measured on TOF instruments.
+## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010")
## Calculate m/z width of features +mzw <- featureDefinitions(faahko)$mzmax - featureDefinitions(faahko)$mzmin +mzw_ppm <- featureDefinitions(faahko_ppm)$mzmax - + featureDefinitions(faahko_ppm)$mzmin +plot(featureDefinitions(faahko_ppm)$mzmed, mzw_ppm, + xlab = "m/z", ylab = "m/z width", pch = 21, + col = "#0000ff20", bg = "#0000ff10") +points(featureDefinitions(faahko)$mzmed, mzw, pch = 21, + col = "#ff000020", bg = "#ff000010")
+Relationship between a feature’s m/z and the m/z width (max - min m/z) +of the feature. Red points represent the results with the fixed m/z bin +size, blue with the m/z-relative bin size. +
Results from the correspondence analysis can be accessed with the featureDefinitions and featureValues function. The former returns a data frame with general information on each of the @@ -1242,7 +1297,7 @@
featureDefinitions
featureValues
+ featureDefinitions(faahko) |> head() ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
featureDefinitions(faahko) |> head()
## mzmed mzmin mzmax rtmed rtmin rtmax npeaks KO WT peakidx ## FT001 200.1 200.1 200.1 2902.634 2882.603 2922.664 2 2 0 458, 1161 @@ -1266,7 +1321,7 @@ Correspondence - + featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ featureValues(faahko, value = "into") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
featureValues(faahko, value = "into") |> head()
## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1306,7 +1361,7 @@ Correspondenceall features in a data set, which can take also a considerable amount of time. Below we extract the chromatograms for the first 4 features. - + feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified. And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area. - + plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms @@ -1333,7 +1388,7 @@ Correspondence## 4 feature(s) identified.
feature_chroms <- featureChromatograms(faahko, features = 1:4) feature_chroms
And plot the extracted ion chromatograms. We again use the group color for each identified peak to fill the area.
+ plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]]) @@ -1343,7 +1398,7 @@ Correspondence To access the EICs of the second feature we can simply subset the feature_chroms object. - + eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
plot(feature_chroms, col = sample_colors, peakBg = sample_colors[chromPeaks(feature_chroms)[, "sample"]])
To access the EICs of the second feature we can simply subset the feature_chroms object.
feature_chroms
+ eic_2 <- feature_chroms[2, ] chromPeaks(eic_2) ## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance ## Loading required package: GenomeInfoDb - + res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
eic_2 <- feature_chroms[2, ] chromPeaks(eic_2)
## mz mzmin mzmax rt rtmin rtmax into intb maxo sn @@ -1390,7 +1445,7 @@ Gap filling + faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance
+ faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head() @@ -1400,7 +1455,7 @@ Gap filling## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1160580.5 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1160580.5 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1430,7 +1485,7 @@ Final result + library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance
faahko <- fillChromPeaks(faahko, param = ChromPeakAreaParam()) featureValues(faahko, value = "into") |> head()
+ library(SummarizedExperiment) ## Loading required package: MatrixGenerics ## Loading required package: matrixStats @@ -1469,7 +1524,7 @@ Final result## ## distance
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## Loading required package: GenomeInfoDb
+ res <- quantify(faahko, value = "into", method = "sum") res ## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
res <- quantify(faahko, value = "into", method = "sum") res
## class: SummarizedExperiment @@ -1485,7 +1540,7 @@ Final result + rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ rowData(res) ## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
rowData(res)
## DataFrame with 351 rows and 10 columns ## mzmed mzmin mzmax rtmed rtmin rtmax npeaks @@ -1518,7 +1573,7 @@ Final result + colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ colData(res) ## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
colData(res)
## DataFrame with 8 rows and 4 columns ## sample_name sample_group spectraOrigin sample_type @@ -1536,13 +1591,13 @@ Final result + assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ assayNames(res) ## [1] "raw" And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix. - + assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
assayNames(res)
## [1] "raw"
And we can access the actual data using the assay function, optionally also providing the name of the assay we want to access. Below we show the first 6 lines of that matrix.
assay
+ assay(res) |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
assay(res) |> head()
## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 135162.4 506848.9 111657.3 169955.6 209929.4 141607.9 226853.7 @@ -1550,7 +1605,7 @@ Final result## FT003 213659.3 289500.7 164380.7 178285.7 253825.6 241844.4 240606.0 ## FT004 349011.5 451863.7 343897.8 208002.8 364609.8 360908.9 226234.4 ## FT005 286221.4 285857.6 164009.0 149097.6 255697.7 311296.8 366441.5 -## FT006 1923307.8 1129426.4 380970.3 588986.4 1286883.0 1739516.6 639755.3 +## FT006 1923307.8 1102832.6 380970.3 588986.4 1286883.0 1739516.6 639755.3 ## wt22.CDF ## FT001 138341.2 ## FT002 1354004.9 @@ -1562,14 +1617,14 @@ Final result + assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum") With that we have now two assays in our result object. - + assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
assays(res)$raw_nofill <- featureValues(faahko, filled = FALSE, method = "sum")
With that we have now two assays in our result object.
+ assayNames(res) ## [1] "raw" "raw_nofill" And we can extract the feature values without gap-filling: - + assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
## [1] "raw" "raw_nofill"
And we can extract the feature values without gap-filling:
+ assay(res, "raw_nofill") |> head() ## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
assay(res, "raw_nofill") |> head()
## ko15.CDF ko16.CDF ko21.CDF ko22.CDF wt15.CDF wt16.CDF wt21.CDF ## FT001 NA 506848.9 NA 169955.6 NA NA NA @@ -1588,12 +1643,12 @@ Final result + metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+ metadata(res) ## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
metadata(res)
## [[1]] ## Object of class "XProcessHistory" ## type: Peak detection -## date: Thu Dec 7 08:46:45 2023 +## date: Fri Jan 26 20:10:29 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: CentWaveParam @@ -1602,7 +1657,7 @@ Final result## [[2]] ## Object of class "XProcessHistory" ## type: Peak refinement -## date: Thu Dec 7 08:46:48 2023 +## date: Fri Jan 26 20:10:32 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: MergeNeighboringPeaksParam @@ -1611,7 +1666,7 @@ Final result## [[3]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:46:59 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1620,7 +1675,7 @@ Final result## [[4]] ## Object of class "XProcessHistory" ## type: Retention time correction -## date: Thu Dec 7 08:47:00 2023 +## date: Fri Jan 26 20:10:46 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakGroupsParam @@ -1629,7 +1684,7 @@ Final result## [[5]] ## Object of class "XProcessHistory" ## type: Peak grouping -## date: Thu Dec 7 08:47:04 2023 +## date: Fri Jan 26 20:10:51 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: PeakDensityParam @@ -1638,7 +1693,7 @@ Final result## [[6]] ## Object of class "XProcessHistory" ## type: Missing peak filling -## date: Thu Dec 7 08:47:08 2023 +## date: Fri Jan 26 20:10:58 2024 ## info: ## fileIndex: 1,2,3,4,5,6,7,8 ## Parameter class: ChromPeakAreaParam @@ -1646,11 +1701,11 @@ Final result +
+