-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathPericardium.R
44 lines (36 loc) · 1.67 KB
/
Pericardium.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# R script to download selected samples
# Copy code and run on a local machine to initiate download
# Check for dependencies and install if missing
packages <- c("rhdf5")
if (length(setdiff(packages, rownames(installed.packages()))) > 0) {
print("Install required packages")
source("https://bioconductor.org/biocLite.R")
biocLite("rhdf5")
}
library("rhdf5")
library("tools")
destination_file = "human_matrix_download.h5"
extracted_expression_file = "Pericardium_expression_matrix.tsv"
url = "https://s3.amazonaws.com/mssm-seq-matrix/human_matrix.h5"
# Check if gene expression file was already downloaded and check integrity, if not in current directory download file form repository
if(!file.exists(destination_file)){
print("Downloading compressed gene expression matrix.")
download.file(url, destination_file, quiet = FALSE)
}
# Selected samples to be extracted
samp = c("GSM2343609","GSM2343610","")
# Retrieve information from compressed data
samples = h5read(destination_file, "meta/Sample_geo_accession")
tissue = h5read(destination_file, "meta/Sample_source_name_ch1")
genes = h5read(destination_file, "meta/genes")
title = h5read(destination_file, "meta/Sample_title")
# Identify columns to be extracted
sample_locations = which(samples %in% samp)
# extract gene expression from compressed data
expression = h5read(destination_file, "data/expression", index=list(1:length(genes), sample_locations))
H5close()
rownames(expression) = genes
colnames(expression) = samples[sample_locations]
# Print file
write.table(expression, file=extracted_expression_file, sep="\t", quote=FALSE)
print(paste0("Expression file was created at ", getwd(), "/", extracted_expression_file))