Skip to content

Commit

Permalink
Merge pull request #2379 from opencb/TASK-4158
Browse files Browse the repository at this point in the history
TASK-4158
  • Loading branch information
jtarraga authored Feb 7, 2024
2 parents 255a2df + 946755f commit e1385cf
Show file tree
Hide file tree
Showing 120 changed files with 1,940 additions and 843 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package org.opencb.opencga.analysis.clinical;

import org.apache.commons.lang3.StringUtils;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.catalog.managers.FileManager;
import org.opencb.opencga.catalog.models.ClinicalAnalysisLoadResult;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.clinical.ClinicalAnalysisLoadParams;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;

@Tool(id = ClinicalAnalysisLoadTask.ID, resource = Enums.Resource.CLINICAL_ANALYSIS, description = ClinicalAnalysisLoadTask.DESCRIPTION)
public class ClinicalAnalysisLoadTask extends OpenCgaToolScopeStudy {
public final static String ID = "load";
public static final String DESCRIPTION = "Load clinical analyses from a file";

private Path filePath;

@ToolParams
protected ClinicalAnalysisLoadParams params = new ClinicalAnalysisLoadParams();

@Override
protected void check() throws Exception {
super.check();

String fileStr = params.getFile();
if (StringUtils.isEmpty(fileStr)) {
throw new ToolException("Missing input file when loading clinical analyses.");
}

File file = catalogManager.getFileManager().get(getStudy(), fileStr, FileManager.INCLUDE_FILE_URI_PATH, token).first();
filePath = Paths.get(file.getUri());
if (!filePath.toFile().exists()) {
throw new ToolException("Input file '" + fileStr + "' does not exist: " + filePath);
}
}

@Override
protected void run() throws Exception {
step(() -> {
ClinicalAnalysisLoadResult loadResult = catalogManager.getClinicalAnalysisManager().load(getStudy(), filePath, token);

// Add results as attributes
addAttribute("Num. clinical analyses loaded", loadResult.getNumLoaded());
addAttribute("Num. clinical analyses not loaded", loadResult.getFailures().size());
addAttribute("Loading time (in sec.)", loadResult.getTime());
addAttribute("Clinical analyses file name", loadResult.getFilename());

// Add warnings with the not loaded clinical analysis
if (loadResult.getFailures().size() > 0) {
for (Map.Entry<String, String> entry : loadResult.getFailures().entrySet()) {
addWarning("Clinical analysis " + entry.getKey() + " could not be loaded due to error: " + entry.getValue());
}
}
});
}
}

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,16 @@ public class ToolFactory {
private static Map<String, Set<Class<? extends OpenCgaTool>>> duplicatedTools;
private static List<Class<? extends OpenCgaTool>> toolsList;

private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools() {
public static final String DEFAULT_PACKAGE = "org.opencb.opencga";

private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools(List<String> packages) {
if (toolsCache == null) {
Reflections reflections = new Reflections(new ConfigurationBuilder()
.setScanners(
new SubTypesScanner(),
new TypeAnnotationsScanner().filterResultsBy(s -> StringUtils.equals(s, Tool.class.getName()))
)
.addUrls(getUrls())
.addUrls(getUrlsFromPackages(packages))
.filterInputsBy(input -> input != null && input.endsWith(".class"))
);

Expand Down Expand Up @@ -85,9 +87,20 @@ private static synchronized Map<String, Class<? extends OpenCgaTool>> loadTools(
}
return toolsCache;
}
static Collection<URL> getUrlsFromPackages(List<String> packages) {
Collection<URL> urls = new LinkedList<>();
for (String pack :packages){
for (URL url : ClasspathHelper.forPackage(pack)) {
String name = url.getPath().substring(url.getPath().lastIndexOf('/') + 1);
if (name.isEmpty() || (name.contains("opencga") && !name.contains("opencga-storage-hadoop-deps"))) {
urls.add(url);
}
}
}
return urls;
}

static Collection<URL> getUrls() {
// TODO: What if there are third party libraries that implement Tools?
// Currently they must contain "opencga" in the jar name.
// e.g. acme-rockets-opencga-5.4.0.jar
Collection<URL> urls = new LinkedList<>();
Expand All @@ -101,6 +114,10 @@ static Collection<URL> getUrls() {
}

public final Class<? extends OpenCgaTool> getToolClass(String toolId) throws ToolException {
return getToolClass(toolId, Collections.singletonList(DEFAULT_PACKAGE));
}

public final Class<? extends OpenCgaTool> getToolClass(String toolId, List<String> packages) throws ToolException {
Objects.requireNonNull(toolId);

Class<? extends OpenCgaTool> aClass = null;
Expand All @@ -112,7 +129,7 @@ public final Class<? extends OpenCgaTool> getToolClass(String toolId) throws Too
} catch (ClassNotFoundException ignore) {
}
if (aClass == null) {
aClass = loadTools().get(toolId);
aClass = loadTools(packages).get(toolId);
}
if (aClass == null) {
throw new ToolException("Tool '" + toolId + "' not found");
Expand All @@ -121,11 +138,19 @@ public final Class<? extends OpenCgaTool> getToolClass(String toolId) throws Too
}

public Tool getTool(String toolId) throws ToolException {
return getToolClass(toolId).getAnnotation(Tool.class);
return getTool(toolId, Collections.singletonList(DEFAULT_PACKAGE));
}

public Tool getTool(String toolId, List<String> packages) throws ToolException {
return getToolClass(toolId, packages).getAnnotation(Tool.class);
}

public final OpenCgaTool createTool(String toolId) throws ToolException {
return createTool(getToolClass(toolId));
return createTool(toolId, Collections.singletonList(DEFAULT_PACKAGE));
}

public final OpenCgaTool createTool(String toolId, List<String> packages) throws ToolException {
return createTool(getToolClass(toolId, packages));
}

public final OpenCgaTool createTool(Class<? extends OpenCgaTool> aClass) throws ToolException {
Expand All @@ -141,12 +166,22 @@ public final OpenCgaTool createTool(Class<? extends OpenCgaTool> aClass) throws
}

public Collection<Class<? extends OpenCgaTool>> getTools() {
loadTools();
loadTools(Collections.singletonList(DEFAULT_PACKAGE));
return toolsList;
}

public Collection<Class<? extends OpenCgaTool>> getTools(List<String> packages) {
loadTools(packages);
return toolsList;
}

public Map<String, Set<Class<? extends OpenCgaTool>>> getDuplicatedTools() {
loadTools();
loadTools(Collections.singletonList(DEFAULT_PACKAGE));
return duplicatedTools;
}

public Map<String, Set<Class<? extends OpenCgaTool>>> getDuplicatedTools(List<String> packages) {
loadTools(packages);
return duplicatedTools;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@

package org.opencb.opencga.analysis.tools;

import org.apache.commons.collections4.CollectionUtils;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.variant.manager.VariantStorageManager;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.core.api.ParamConstants;
import org.opencb.opencga.core.config.Configuration;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.job.Job;
import org.opencb.opencga.core.tools.ToolParams;
Expand All @@ -32,6 +34,7 @@

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;

public class ToolRunner {

Expand All @@ -42,16 +45,30 @@ public class ToolRunner {
private final String opencgaHome;
private final ToolFactory toolFactory;

private final Configuration configuration;

public ToolRunner(String opencgaHome, CatalogManager catalogManager, StorageEngineFactory storageEngineFactory) {
this(opencgaHome, catalogManager, new VariantStorageManager(catalogManager, storageEngineFactory));
}

public ToolRunner(String opencgaHome, CatalogManager catalogManager, StorageEngineFactory storageEngineFactory,
Configuration configuration) {
this(opencgaHome, catalogManager, new VariantStorageManager(catalogManager, storageEngineFactory), configuration);
}

public ToolRunner(String opencgaHome, CatalogManager catalogManager, VariantStorageManager variantStorageManager) {
this(opencgaHome, catalogManager, variantStorageManager, null);
}

public ToolRunner(String opencgaHome, CatalogManager catalogManager, VariantStorageManager variantStorageManager,
Configuration configuration) {
this.opencgaHome = opencgaHome;
this.catalogManager = catalogManager;

this.variantStorageManager = variantStorageManager;
this.toolFactory = new ToolFactory();

this.configuration = configuration;
}

/**
Expand Down Expand Up @@ -102,9 +119,14 @@ public ExecutionResult execute(Job job, Path outDir, String token) throws Catalo
* @throws ToolException if the execution fails
*/
public ExecutionResult execute(String toolId, ObjectMap params, Path outDir, String jobId, String token) throws ToolException {
return toolFactory
.createTool(toolId)
.setUp(opencgaHome, catalogManager, variantStorageManager, params, outDir, jobId, token)
OpenCgaTool tool;
if (configuration != null && configuration.getAnalysis() != null
&& CollectionUtils.isNotEmpty(configuration.getAnalysis().getPackages())) {
tool = toolFactory.createTool(toolId, configuration.getAnalysis().getPackages());
} else {
tool = toolFactory.createTool(toolId);
}
return tool.setUp(opencgaHome, catalogManager, variantStorageManager, params, outDir, jobId, token)
.start();
}

Expand Down Expand Up @@ -177,6 +199,7 @@ public ExecutionResult execute(Class<? extends OpenCgaTool> tool, ToolParams too
* @throws ToolException if the execution fails
*/
public ExecutionResult execute(Class<? extends OpenCgaTool> tool, ObjectMap params, Path outDir, String jobId, String token) throws ToolException {

return toolFactory
.createTool(tool)
.setUp(opencgaHome, catalogManager, variantStorageManager, params, outDir, jobId, token)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.TestParamConstants;
import org.opencb.opencga.analysis.clinical.ClinicalAnalysisLoadTask;
import org.opencb.opencga.analysis.tools.ToolRunner;
import org.opencb.opencga.analysis.variant.gwas.GwasAnalysis;
import org.opencb.opencga.analysis.variant.hrdetect.HRDetectAnalysis;
Expand All @@ -62,12 +63,15 @@
import org.opencb.opencga.core.config.storage.CellBaseConfiguration;
import org.opencb.opencga.core.config.storage.StorageConfiguration;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.clinical.ClinicalAnalysis;
import org.opencb.opencga.core.models.clinical.ClinicalAnalysisLoadParams;
import org.opencb.opencga.core.models.cohort.Cohort;
import org.opencb.opencga.core.models.cohort.CohortCreateParams;
import org.opencb.opencga.core.models.cohort.CohortUpdateParams;
import org.opencb.opencga.core.models.common.AnnotationSet;
import org.opencb.opencga.core.models.family.Family;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.models.file.FileLinkParams;
import org.opencb.opencga.core.models.individual.Individual;
import org.opencb.opencga.core.models.individual.IndividualInternal;
import org.opencb.opencga.core.models.individual.Location;
Expand Down Expand Up @@ -1063,6 +1067,46 @@ public void testPedigreeGraph() throws CatalogException {
assertEquals(base64, family.getPedigreeGraph().getBase64());
}

@Test
public void testClinicalAnalysisLoading() throws IOException, ToolException, CatalogException {
String fileStr = "clinical_analyses.json.gz";

String gzFile = getClass().getResource("/biofiles/" + fileStr).getFile();
File file = catalogManager.getFileManager().link(CANCER_STUDY, new FileLinkParams(gzFile, "ca", "", "", null, null, null, null,
null), true, token).first();
System.out.println("file ID = " + file.getId());
System.out.println("file name = " + file.getName());

// Run clinical analysis load task
Path loadingOutDir = Paths.get(opencga.createTmpOutdir("_clinical_analysis_outdir"));
System.out.println("Clinical analysis load task out dir = " + loadingOutDir);

ClinicalAnalysisLoadParams params = new ClinicalAnalysisLoadParams();
params.setFile(file.getId());

toolRunner.execute(ClinicalAnalysisLoadTask.class, params, new ObjectMap(ParamConstants.STUDY_PARAM,
CANCER_STUDY), loadingOutDir, null, token);

String ca1Id = "SAP-45016-1";
String ca2Id = "OPA-6607-1";

Query query = new Query();
OpenCGAResult<ClinicalAnalysis> result = catalogManager.getClinicalAnalysisManager().search(CANCER_STUDY, query, QueryOptions.empty(),
token);
Assert.assertTrue(result.getResults().stream().map(ca -> ca.getId()).collect(Collectors.toList()).contains(ca1Id));
Assert.assertTrue(result.getResults().stream().map(ca -> ca.getId()).collect(Collectors.toList()).contains(ca2Id));

query.put("id", ca1Id);
ClinicalAnalysis clinicalAnalysis = catalogManager.getClinicalAnalysisManager().search(CANCER_STUDY, query, QueryOptions.empty(),
token).first();
Assert.assertEquals(ca1Id, clinicalAnalysis.getId());

query.put("id", ca2Id);
clinicalAnalysis = catalogManager.getClinicalAnalysisManager().search(CANCER_STUDY, query, QueryOptions.empty(),
token).first();
Assert.assertEquals(ca2Id, clinicalAnalysis.getId());
}

@Test
public void testCellbaseConfigure() throws Exception {
String project = "Project_test_cellbase_configure";
Expand Down
2 changes: 2 additions & 0 deletions opencga-app/app/cloud/docker/compose/conf/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ healthCheck:


analysis:
packages: # List of packages where to find analysis tools
- "org.opencb.opencga"
scratchDir: "" # Scratch folder for the analysis.
execution:
# Accepted values are "local", "SGE", "azure-batch", "k8s"
Expand Down
2 changes: 1 addition & 1 deletion opencga-app/app/misc/clients/r_client_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, server_url, output_dir):
'Analysis - Clinical': 'Clinical',
'Operations - Variant Storage': 'Operation',
'Meta': 'Meta',
'Cva': 'Cva',
'Cvdb': 'Cvdb',
'GA4GH': 'GA4GH',
'Admin': 'Admin'
}
Expand Down
2 changes: 1 addition & 1 deletion opencga-app/app/misc/clients/rest_client_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, rest_api_file, output_dir):
'Analysis - Clinical': 'ClinicalAnalysis',
'Operations - Variant Storage': 'VariantOperation',
'Meta': 'Meta',
'Cva': 'Cva',
'Cvdb': 'Cvdb',
'GA4GH': 'GA4GH',
'Admin': 'Admin'
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import static org.opencb.opencga.app.cli.internal.options.AlignmentCommandOptions.SamtoolsCommandOptions.SAMTOOLS_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.CancerTieringCommandOptions.CANCER_TIERING_INTERPRETATION_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.ExomiserInterpretationCommandOptions.EXOMISER_INTERPRETATION_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.ImportClinicalAnalysesCommandOptions.IMPORT_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.RgaAuxiliarSecondaryIndexCommandOptions.RGA_AUX_INDEX_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.RgaSecondaryIndexCommandOptions.RGA_INDEX_RUN_COMMAND;
import static org.opencb.opencga.app.cli.internal.options.ClinicalCommandOptions.TeamCommandOptions.TEAM_INTERPRETATION_RUN_COMMAND;
Expand Down Expand Up @@ -226,6 +227,7 @@ public InternalCliOptionsParser() {
clinicalSubCommands.addCommand(RGA_INDEX_RUN_COMMAND, clinicalCommandOptions.rgaSecondaryIndexCommandOptions);
clinicalSubCommands.addCommand(RGA_AUX_INDEX_RUN_COMMAND, clinicalCommandOptions.rgaAuxiliarSecondaryIndexCommandOptions);
clinicalSubCommands.addCommand(EXOMISER_INTERPRETATION_RUN_COMMAND, clinicalCommandOptions.exomiserInterpretationCommandOptions);
clinicalSubCommands.addCommand(IMPORT_COMMAND, clinicalCommandOptions.importClinicalAnalysesCommandOptions);
clinicalSubCommands.addCommand("tsv-load", clinicalCommandOptions.tsvLoad);

fileCommandOptions = new FileCommandOptions(commonCommandOptions, jCommander);
Expand Down
Loading

0 comments on commit e1385cf

Please sign in to comment.