-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinternal_standards_evaluation.Rmd
149 lines (118 loc) · 4.41 KB
/
internal_standards_evaluation.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
---
title: "Evaluate internal standards"
author: "Mar Garcia-Aloy"
output:
BiocStyle::html_document:
toc: true
number_sections: false
toc_float: true
---
# Parameters
```{r}
polarity <- "POS" # specify "POS" or "NEG"
#' Define the path where we can find the mzML files:
MZML_PATH <- "C:/Users/mgarciaaloy/Documents/mzML/"
```
# Preliminaries
```{r, message=FALSE}
source("R/listpeaks.R")
library(xcms)
library(reshape2)
library(ggplot2)
library(Rdisop)
library(CompoundDb)
library(beeswarm)
library(RColorBrewer)
```
# Data import
In the file `internal_standards_files.txt` there is the information regarding
the injection sequence.
```{r}
injections <- read.table("data/internal_standards_files.txt", # import the file
sep = "\t", header = TRUE, as.is = TRUE)
myfiles <- injections$mzML # get file names
myfiles <- myfiles[grep(polarity, myfiles)]# select files names of our polarity
```
In the file `internal_standards.txt` there is the information regarding
the standards that are in the samples.
```{r}
std_info <- read.table("data/internal_standards.txt",
sep = "\t", header = TRUE, as.is = TRUE)
std_info <- std_info[,1:5]
std_info <- std_info[!is.na(std_info[, grep(polarity, colnames(std_info))]),]
std_info$mzneut = NA
for(i in seq(nrow(std_info))){
if(grepl("C", std_info$formula[i])){std_info$mzneut[i] =
getMolecule(as.character(std_info$formula[i]))$exactmass}else{
std_info$mzneut[i] = as.numeric(std_info$formula[i])}
}
head(std_info)
```
# Peak detection
Apply the `listpeaks()` function in order to detect the peaks of ISs:
```{r, warning=FALSE}
mzvalues <- c()
for(i in seq(nrow(std_info))){
mzvalues <- c(mzvalues,
unlist(mass2mz(std_info$mzneut[i],
adduct =
as.character(
std_info[i,
grep(polarity, colnames(std_info))]
))))
}
rtvalues <- std_info$RT
peaktb <- listpeaks(file = paste0(MZML_PATH, myfiles),
mz = mzvalues,
param = CentWaveParam(peakwidth = c(2, 10),
ppm = 30))
peaktb$name <- std_info$name[peaktb$row]
# calculate the difference between "rtmed" and the expected retention time:
peaktb$rt_delta <- abs(std_info$RT[peaktb$row] - peaktb$rt)
# subset the peak matrix to rows (peaks) for which this difference
# (the absolute value of the difference) is smaller than 60 seconds:
peaktb <- subset(peaktb, rt_delta < 60)
# among the subsetted peaks, choose the one with the highest intensity:
pl <- split(peaktb, paste(peaktb$row, peaktb$column))
pl <- lapply(pl, function(z){
z[which.max(z$into),]
})
peaktb <- do.call(rbind, pl)
peaktb_wide <- dcast(peaktb, peaktb$name ~ peaktb$colum, value.var = "into")
peaktb_wide <- setNames(data.frame(t(peaktb_wide[,-1])), peaktb_wide[,1])
peaktb_wide[is.na(peaktb_wide)] <- min(peaktb_wide, na.rm=T)/1000
```
# Plots
## Individual compounds
```{r}
getPalette = colorRampPalette(brewer.pal(12, "Paired"))
mycols = getPalette(nrow(std_info))
myplots = list()
for(i in seq(ncol(peaktb_wide))){
tb <- data.frame(sample = seq(length(myfiles)),
intensity = peaktb_wide[,i] /
mean(peaktb_wide[peaktb_wide[,i] != 0, i]))
myplots[[i]] <-
ggplot(tb, aes(sample, log2(intensity))) +
geom_point(color=mycols[i], cex=3) + geom_line() + theme_classic() +
ggtitle(colnames(peaktb_wide)[i]) +
xlab("") + ylab("Intensity") + ylim(-1,1)
}
ggsave(paste0("internal_standard_evaluation_relative_", polarity, ".pdf"),
gridExtra::marrangeGrob(grobs = myplots, top="",
layout_matrix =
rbind(c(1,2,3,4), c(5,6,7,8), c(9,10,11,12),
c(13,14,15,16), c(17,18,19,20))),
width=21, height=15)
```
## All together
```{r}
peaktb_rel <- apply(peaktb_wide, 2, function(x){x/(mean(x[x!=0]))})
peaktb_rel <- melt(peaktb_rel, id.vars = row.names(peaktb_rel))
boxplot(log2(peaktb_rel$value) ~ peaktb_rel$Var1,
pch = 16, xlab = "sample", ylab= "log2(relative intensity)" ,
outline = FALSE, main=paste0("IS - ", polarity))
beeswarm(log2(peaktb_rel$value) ~ peaktb_rel$Var1,
pwcol = mycols[peaktb_rel$Var2],
pch = 16, xlab = "", ylab= "", add = TRUE)
```