Skip to content

Commit

Permalink
tools: RvTests tests, #125
Browse files Browse the repository at this point in the history
  • Loading branch information
jtarraga committed Jan 23, 2017
1 parent 721f303 commit 0bc165e
Show file tree
Hide file tree
Showing 7 changed files with 515 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public LocalCliOptionsParser() {
variantSubCommands.addCommand("view", variantCommandOptions.viewVariantCommandOptions);
variantSubCommands.addCommand("query", variantCommandOptions.queryVariantCommandOptions);
variantSubCommands.addCommand("metadata", variantCommandOptions.metadataVariantCommandOptions);
variantSubCommands.addCommand("rvtests", variantCommandOptions.rvtestsVariantCommandOptions);
}

public void parse(String[] args) throws ParameterException {
Expand Down Expand Up @@ -439,13 +440,15 @@ public class VariantCommandOptions extends CommandOptions {
ViewVariantCommandOptions viewVariantCommandOptions;
QueryVariantCommandOptions queryVariantCommandOptions;
MetadataVariantCommandOptions metadataVariantCommandOptions;
RvTestsVariantCommandOptions rvtestsVariantCommandOptions;

public VariantCommandOptions() {
this.convertVariantCommandOptions = new ConvertVariantCommandOptions();
this.annotateVariantCommandOptions = new AnnotateVariantCommandOptions();
this.viewVariantCommandOptions = new ViewVariantCommandOptions();
this.queryVariantCommandOptions = new QueryVariantCommandOptions();
this.metadataVariantCommandOptions = new MetadataVariantCommandOptions();
this.rvtestsVariantCommandOptions = new RvTestsVariantCommandOptions();
}
}

Expand Down Expand Up @@ -740,6 +743,30 @@ class MetadataVariantCommandOptions {
public boolean summary = false;
}

@Parameters(commandNames = {"rvtests"}, commandDescription = "Execute the 'rvtests' program.")
class RvTestsVariantCommandOptions {

@ParametersDelegate
public CommonCommandOptions commonOptions = commonCommandOptions;

@Parameter(names = {"-i", "--input"}, description = "Input file name (in Avro/Parquet file format).",
required = true, arity = 1)
public String inFilename;

@Parameter(names = {"-o", "--output"}, description = "Output directory name to save the rvtests results.",
required = true, arity = 1)
public String outDirname;

@Parameter(names = {"--dataset"}, description = "Target dataset.",
arity = 1)
public String datasetId = null;

@Parameter(names = {"-c", "--config"}, description = "Configuration file name containing the rvtests parameters.",
required = true, arity = 1)
public String confFilename;
}


private void printMainUsage() {
// TODO This is a nasty hack. By some unknown reason JCommander only prints the description from first command
Map<String, String> commandDescription = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.opencb.hpg.bigdata.core.lib.SparkConfCreator;
import org.opencb.hpg.bigdata.core.lib.VariantDataset;
import org.opencb.hpg.bigdata.core.parquet.VariantParquetConverter;
import org.opencb.hpg.bigdata.tools.variant.analysis.RvTestsAnalysis;

import java.io.*;
import java.nio.file.Files;
Expand Down Expand Up @@ -96,11 +97,17 @@ public void execute() throws Exception {
query();
break;
case "metadata":
init(variantCommandOptions.queryVariantCommandOptions.commonOptions.logLevel,
variantCommandOptions.queryVariantCommandOptions.commonOptions.verbose,
variantCommandOptions.queryVariantCommandOptions.commonOptions.conf);
init(variantCommandOptions.metadataVariantCommandOptions.commonOptions.logLevel,
variantCommandOptions.metadataVariantCommandOptions.commonOptions.verbose,
variantCommandOptions.metadataVariantCommandOptions.commonOptions.conf);
metadata();
break;
case "rvtests":
init(variantCommandOptions.rvtestsVariantCommandOptions.commonOptions.logLevel,
variantCommandOptions.rvtestsVariantCommandOptions.commonOptions.verbose,
variantCommandOptions.rvtestsVariantCommandOptions.commonOptions.conf);
rvtests();
break;
default:
break;
}
Expand Down Expand Up @@ -842,4 +849,13 @@ public void metadata() throws Exception {
System.out.println("Error: metafile does not exist, " + metaFile.getAbsolutePath());
}
}


public void rvtests() throws Exception {
RvTestsAnalysis rvtests = new RvTestsAnalysis(variantCommandOptions.rvtestsVariantCommandOptions.inFilename,
variantCommandOptions.rvtestsVariantCommandOptions.outDirname,
variantCommandOptions.rvtestsVariantCommandOptions.confFilename);

rvtests.run(variantCommandOptions.rvtestsVariantCommandOptions.datasetId);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package org.opencb.hpg.bigdata.app.cli.local;

import org.junit.Test;

import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
* Created by joaquin on 1/19/17.
*/
public class VariantRvTestsCLITest {
Path inPath;
Path outPath;
Path confPath;

private void init() throws URISyntaxException {
inPath = Paths.get("/home/jtarraga/data/vcf/skat/example.vcf.avro");
outPath = Paths.get("/home/jtarraga/data/vcf/skat/out");
confPath = Paths.get("/home/jtarraga/data/vcf/skat/skat.params");
}

@Test
public void skat() {

try {
init();

StringBuilder commandLine = new StringBuilder();
commandLine.append(" variant rvtests");
commandLine.append(" --log-level ERROR");
commandLine.append(" -i ").append(inPath);
commandLine.append(" -o ").append(outPath);
commandLine.append(" -c ").append(confPath);
commandLine.append(" --dataset noname");

VariantQueryCLITest.execute(commandLine.toString());
} catch (Exception e) {
e.printStackTrace();
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.spark.sql.types.StructType;
import org.apache.spark.storage.StorageLevel;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.avro.VariantAvro;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import scala.Symbol;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package org.opencb.hpg.bigdata.tools.variant.analysis;

import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
import org.opencb.biodata.formats.pedigree.PedigreeManager;
import org.opencb.biodata.models.core.Region;
import org.opencb.biodata.models.variant.VariantMetadataManager;
import org.opencb.biodata.models.variant.avro.VariantAvro;
import org.opencb.commons.utils.FileUtils;
import org.opencb.hpg.bigdata.core.lib.SparkConfCreator;
import org.opencb.hpg.bigdata.core.lib.VariantDataset;

import java.io.*;
import java.nio.file.Paths;
import java.util.List;
import java.util.Properties;

/**
* Created by joaquin on 1/19/17.
*/
public class RvTestsAnalysis {
private String inFilename;
private String outDirname;
private String confFilename;

private final String RVTEST_BIN = "/home/jtarraga/softs/rvtests/executable/rvtest";
private final String BGZIP_BIN = "/home/joaquin/softs/htslib/bgzip";
private final String TABIX_BIN = "/home/joaquin/softs/htslib/tabix";

public RvTestsAnalysis(String inFilename, String outDirname, String confFilename) {
this.inFilename = inFilename;
this.outDirname = outDirname;
this.confFilename = confFilename;
}

// ./build/bin/hpg-bigdata-local2.sh variant rvtests -i ~/data/vcf/skat/example.vcf.avro -o ~/data/vcf/skat/out --dataset noname -c ~/data/vcf/skat/skat.params

public void run(String dataset) throws Exception {
// create spark session
SparkConf sparkConf = SparkConfCreator.getConf("variant rvtests", "local", 1, true);
SparkSession sparkSession = new SparkSession(new SparkContext(sparkConf));

// load dataset
VariantDataset vd = new VariantDataset(sparkSession);
vd.load(inFilename);
vd.createOrReplaceTempView("vcf");

// load rvtests parameters
Properties prop = new Properties();
InputStream confStream = new FileInputStream(confFilename);
prop.load(confStream);
confStream.close();

for (Object key: prop.keySet()) {
System.out.println((String) key + " = " + (String) prop.get(key));
}

// create temporary directory
File tmpDir = new File(outDirname + "/tmp");
tmpDir.mkdir();

// create temporary file for --pheno
File phenoFile = new File(tmpDir.getAbsolutePath() + "/pheno");
VariantMetadataManager metadataManager = new VariantMetadataManager();
metadataManager.load(inFilename + ".meta.json");
new PedigreeManager().save(metadataManager.getPedigree(dataset), phenoFile.toPath());

// loop for regions
String line;
BufferedReader reader = FileUtils.newBufferedReader(Paths.get(prop.getProperty("setFile")));
int i = 0;
StringBuilder cmdline = new StringBuilder();
while ((line = reader.readLine()) != null) {
String[] fields = line.split("[\t ]");
System.out.println(fields[0]);
String regionName = fields[0];
Region region = new Region(fields[1]);

// create temporary files for --inVcf and --setFile
File setFile = new File(tmpDir.getAbsolutePath() + "/setFile." + i);
BufferedWriter writer = FileUtils.newBufferedWriter(setFile.toPath());
writer.write(fields[0] + "\t" + fields[1] + "\n");
writer.close();

// create temporary vcf file fot the region variants
VariantDataset ds = (VariantDataset) vd.regionFilter(region);
Dataset<VariantAvro> variantDS = ds.as(Encoders.bean(VariantAvro.class));

List<Row> rows = ds.collectAsList();
for (Row row: rows) {
row.g
System.out.println(row);
}
File vcfFile = new File(tmpDir.getAbsolutePath() + "/variants." + i + ".vcf");

// compress vcf to bgz
cmdline.setLength(0);
cmdline.append(this.BGZIP_BIN).append(" ").append(vcfFile.getAbsolutePath());
execute(cmdline.toString());

// and create tabix index
cmdline.setLength(0);
cmdline.append(this.TABIX_BIN).append(" -p vcf ").append(vcfFile.getAbsolutePath()).append(".gz");
execute(cmdline.toString());

// rvtests command line
cmdline.setLength(0);
cmdline.append(this.RVTEST_BIN).append(" --kernel skat --pheno ").append(phenoFile.getAbsolutePath())
.append(" --inVcf ").append(vcfFile.getAbsolutePath()).append(".gz")
.append(" --setFile ").append(setFile.getAbsolutePath())
.append(" --out ").append(tmpDir.getAbsolutePath()).append("/out.").append(i);
execute(cmdline.toString());

i++;
}
reader.close();
}


private void execute(String cmdline) {
try {
System.out.println("Executing: " + cmdline);
Process p = Runtime.getRuntime().exec(cmdline);

BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream()));

// read the output from the command
String s;
System.out.println("Here is the standard output of the command:\n");
while ((s = stdInput.readLine()) != null) {
System.out.println(s);
}

// read any errors from the attempted command
System.out.println("Here is the standard error of the command (if any):\n");
while ((s = stdError.readLine()) != null) {
System.out.println(s);
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,5 @@ public void test() {
trainingSummary.residuals().show();
System.out.println("RMSE: " + trainingSummary.rootMeanSquaredError());
System.out.println("r2: " + trainingSummary.r2());

}
}
Loading

0 comments on commit 0bc165e

Please sign in to comment.