Skip to content

Commit

Permalink
analysis: major improvements in liftover analysis, #TASK-7064, #TASK-…
Browse files Browse the repository at this point in the history
…7049
  • Loading branch information
jtarraga committed Oct 5, 2024
1 parent 1548b7c commit 94c8f6a
Show file tree
Hide file tree
Showing 14 changed files with 529 additions and 125 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,15 @@
import org.apache.commons.lang3.tuple.Pair;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.exec.Command;
import org.opencb.commons.utils.FileUtils;
import org.opencb.opencga.analysis.wrappers.deeptools.DeeptoolsWrapperAnalysis;
import org.opencb.opencga.core.common.GitRepositoryState;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.tools.OpenCgaToolExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.DataOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
Expand Down Expand Up @@ -157,6 +155,52 @@ protected void appendOtherParams(Set<String> skipParams, StringBuilder sb) {
}
}

protected String buildCommandLine(String image, List<AbstractMap.SimpleEntry<String, String>> inputBindings,
AbstractMap.SimpleEntry<String, String> outputBinding, String cmdParams,
Map<String, String> dockerParams) throws IOException {
// Sanity check
if (outputBinding == null) {
throw new IllegalArgumentException("Missing output binding");
}

// Docker run
StringBuilder commandLine = new StringBuilder("docker run --rm ");

// Docker params
boolean setUser = true;
if (dockerParams != null) {
if (dockerParams.containsKey("user")) {
setUser = false;
}
for (String key : dockerParams.keySet()) {
commandLine.append("--").append(key).append(" ").append(dockerParams.get(key)).append(" ");
}
}

if (setUser) {
// User: array of two strings, the first string, the user; the second, the group
String[] user = FileUtils.getUserAndGroup(Paths.get(outputBinding.getKey()), true);
commandLine.append("--user ").append(user[0]).append(":").append(user[1]).append(" ");
}

if (inputBindings != null) {
// Mount management (bindings)
for (AbstractMap.SimpleEntry<String, String> binding : inputBindings) {
commandLine.append("--mount type=bind,source=\"").append(binding.getKey()).append("\",target=\"").append(binding.getValue())
.append("\" ");
}
}
commandLine.append("--mount type=bind,source=\"").append(outputBinding.getKey()).append("\",target=\"")
.append(outputBinding.getValue()).append("\" ");

// Docker image and version
commandLine.append(image).append(" ");

// Image command params
commandLine.append(cmdParams);
return commandLine.toString();
}

protected void runCommandLine(String cmdline) throws ToolException {
checkDockerDaemonAlive();
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,59 +19,146 @@

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.variant.LiftoverWrapperParams;
import org.opencb.opencga.core.tools.ResourceManager;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.*;

import static org.opencb.opencga.core.api.FieldConstants.*;

@Tool(id = LiftoverWrapperAnalysis.ID, resource = Enums.Resource.VARIANT, description = LiftoverWrapperAnalysis.DESCRIPTION)
public class LiftoverWrapperAnalysis extends OpenCgaToolScopeStudy {

public final static String ID = "liftover";
public static final String DESCRIPTION = "BCFtools liftover plugin maps coordinates from assembly 37 to 38.";

public static final String RESOURCES_FOLDER = "resources";
private static final String PREPARE_RESOURCES_STEP = "prepare-resources";

private List<File> files = new ArrayList<>();
private String vcfDest;
private Path resourcePath;

@ToolParams
protected final LiftoverWrapperParams analysisParams = new LiftoverWrapperParams();

protected void check() throws Exception {
// IMPORTANT: the first thing to do since it initializes "study" from params.get(STUDY_PARAM)
super.check();

setUpStorageEngineExecutor(study);

if (CollectionUtils.isEmpty(analysisParams.getFiles())) {
throw new ToolException("Liftover 'file' parameter is mandatory.");
throw new ToolException("Liftover 'files' parameter is mandatory.");
}

// Check files
org.opencb.opencga.core.models.file.File opencgaFile;
for (String file : analysisParams.getFiles()) {
opencgaFile = getCatalogManager().getFileManager().get(study, file, QueryOptions.empty(), token).first();
files.add(Paths.get(opencgaFile.getUri().getPath()).toFile());
}

// Check target assembly
if (StringUtils.isEmpty(analysisParams.getTargetAssembly())) {
throw new ToolException("Liftover 'targetDirectory' parameter is mandatory, valid options are 'GRCh38' and 'hg38'.");
throw new ToolException("Liftover 'targetAssembly' parameter is mandatory, valid options are '" + LIFTOVER_GRCH38 + "' and '"
+ LIFTOVER_HG38 + "'.");
}

if (!LIFTOVER_GRCH38.equals(analysisParams.getTargetAssembly()) && !LIFTOVER_HG38.equals(analysisParams.getTargetAssembly())) {
throw new ToolException("Unknown Liftover 'targetAssembly' parameter ('" + analysisParams.getTargetAssembly()
+ "') , valid options are '" + LIFTOVER_GRCH38 + "' and '" + LIFTOVER_HG38 + "'.");
}

if (StringUtils.isEmpty(analysisParams.getVcfOutdir())) {
// String file = analysisParams.getFiles();
// if (file.contains("/")) {
// // Set output directory to the parent directory of the input file
// analysisParams.setOutdir(file.substring(0, file.lastIndexOf('/')));
// } else {
// // Set output directory to the study root directory
// analysisParams.setOutdir("");
// }
// Check destination
vcfDest = analysisParams.getVcfDestination();
if (StringUtils.isEmpty(vcfDest)) {
logger.info("Liftover 'vcfDestination' parameter is empty, the resultant VCF files will be stored in the job directory: {}",
getOutDir());
} else if (!LIFTOVER_VCF_INPUT_FOLDER.equals(vcfDest)) {
opencgaFile = getCatalogManager().getFileManager().get(study, analysisParams.getVcfDestination(), QueryOptions.empty(), token)
.first();
vcfDest = Paths.get(opencgaFile.getUri().getPath()).toAbsolutePath().toString();
if (!Files.exists(Paths.get(vcfDest))) {
throw new ToolException("Liftover 'vcfDestination' parameter (" + analysisParams.getVcfDestination() + ") with folder ("
+ vcfDest + ") does not exist");
}
}
}

@Override
protected void run() throws Exception {
// setUpStorageEngineExecutor(study);

step(() -> {
executorParams.append("opencgaHome", getOpencgaHome().toString());
executorParams.append("study", study);
executorParams.append("files", analysisParams.getFiles());
executorParams.append("targetAssembly", analysisParams.getTargetAssembly());
executorParams.append("vcfOutdir", analysisParams.getVcfOutdir());
executorParams.append("outdir", analysisParams.getOutdir());

getToolExecutor(LiftoverWrapperAnalysisExecutor.class)
.execute();
});
protected List<String> getSteps() {
return Arrays.asList(PREPARE_RESOURCES_STEP, ID);
}

protected void run() throws ToolException {
// Download and copy liftover resource files in the job dir
step(PREPARE_RESOURCES_STEP, this::prepareResources);

// Run liftover script
step(ID, this::runLiftover);

// Do we have to clean the liftover resource folder
}


protected void prepareResources() throws IOException, ToolException {
// Create folder where the liftover resources will be saved (within the job dir, aka outdir)
resourcePath = Files.createDirectories(getOutDir().resolve(RESOURCES_FOLDER));

// Identify Liftover resources to download only the required ones
Map<String, List<String>> mapResources = new HashMap<>();
switch (analysisParams.getTargetAssembly()) {
case LIFTOVER_GRCH38: {
mapResources.put(ID, Collections.singletonList("GRCh37_to_GRCh38.chain.gz"));
mapResources.put("reference-genome", Arrays.asList("Homo_sapiens.GRCh37.dna.primary_assembly.fa.gz",
"Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"));
break;
}
case LIFTOVER_HG38: {
mapResources.put(ID, Collections.singletonList("hg19ToHg38.over.chain.gz"));
mapResources.put("reference-genome", Arrays.asList("hg19.fa.gz", "hg38.fa.gz"));
break;
}
default: {
throw new ToolException("Unknown Liftover 'targetAssembly' parameter ('" + analysisParams.getTargetAssembly()
+ "') , valid options are '" + LIFTOVER_GRCH38 + "' and '" + LIFTOVER_HG38 + "'.");
}
}

// Download resources and copy them to the job dir
// (this URL is temporary, it should be replaced by the resourceUrl from configuration file)
ResourceManager resourceManager = new ResourceManager(getOpencgaHome(), "http://resources.opencb.org/task-6766/");
for (Map.Entry<String, List<String>> entry : mapResources.entrySet()) {
for (String resourceName : entry.getValue()) {
File resourceFile = resourceManager.getResourceFile(entry.getKey(), resourceName);
Files.copy(resourceFile.toPath(), resourcePath.resolve(resourceFile.getName()));
}
}
}

protected void runLiftover() throws Exception {
// Get executor
LiftoverWrapperAnalysisExecutor executor = getToolExecutor(LiftoverWrapperAnalysisExecutor.class);

// Set parameters and execute
executor.setStudy(study)
.setLiftoverPath(getOpencgaHome().resolve("analysis").resolve(ID))
.setFiles(files)
.setTargetAssembly(analysisParams.getTargetAssembly())
.setVcfDest(vcfDest)
.setResourcePath(resourcePath)
.execute();
}
}
Loading

0 comments on commit 94c8f6a

Please sign in to comment.