Skip to content

Commit

Permalink
Merge pull request #2352 from opencb/TASK-2254
Browse files Browse the repository at this point in the history
TASK-2254 - Error processing job output - malformed VCF and disk quota exceeded
  • Loading branch information
pfurio authored Oct 20, 2023
2 parents d11cf4f + a2a2fa5 commit e037913
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3712,16 +3712,23 @@ OpenCGAResult<File> registerFile(Study study, String filePath, URI fileUri, Stri

File.Type type = filePath.endsWith("/") ? File.Type.DIRECTORY : File.Type.FILE;

File subfile = new File(Paths.get(filePath).getFileName().toString(), type, File.Format.UNKNOWN,
File.Bioformat.NONE, fileUri, filePath, "", TimeUtils.getTime(), TimeUtils.getTime(),
"", isExternal(study, filePath, fileUri), size, new Software(), new FileExperiment(), Collections.emptyList(),
Collections.emptyList(), jobId, studyManager.getCurrentRelease(study), Collections.emptyList(), Collections.emptyList(),
new FileQualityControl(), Collections.emptyMap(), new Status(), FileInternal.init(), Collections.emptyMap());
File.Format format = org.opencb.opencga.catalog.managers.FileUtils.detectFormat(fileUri);
File.Bioformat bioformat = org.opencb.opencga.catalog.managers.FileUtils.detectBioformat(fileUri);
File subfile = new File(Paths.get(filePath).getFileName().toString(), type, format, bioformat, fileUri, filePath, "",
TimeUtils.getTime(), TimeUtils.getTime(), "", isExternal(study, filePath, fileUri), size, new Software(),
new FileExperiment(), Collections.emptyList(), Collections.emptyList(), jobId, studyManager.getCurrentRelease(study),
Collections.emptyList(), Collections.emptyList(), new FileQualityControl(), Collections.emptyMap(), new Status(),
FileInternal.init(), Collections.emptyMap());
subfile.setUuid(UuidUtils.generateOpenCgaUuid(UuidUtils.Entity.FILE));
checkHooks(subfile, study.getFqn(), HookConfiguration.Stage.CREATE);

// Improve metadata information and extract samples if any
new FileMetadataReader(catalogManager).addMetadataInformation(study.getFqn(), subfile);
try {
new FileMetadataReader(catalogManager).addMetadataInformation(study.getFqn(), subfile);
} catch (CatalogException e) {
subfile.getInternal().setStatus(new FileStatus(FileStatus.ERROR, "Could not extract metadata information: " + e.getMessage()));
logger.warn("Could not extract metadata information from file {}: {}", fileUri, e.getMessage(), e);
}
List<Sample> existingSamples = new LinkedList<>();
List<Sample> nonExistingSamples = new LinkedList<>();
validateNewSamples(study, subfile, existingSamples, nonExistingSamples, token);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ private FileUpdateParams extractMetadataInformation(String studyId, File file) t
VariantFileMetadata fileMetadata;
try {
fileMetadata = readVariantFileMetadata(file, file.getUri());
} catch (IOException e) {
} catch (Exception e) {
throw new CatalogIOException("Unable to read VariantSource", e);
}
if (fileMetadata != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ public class FileStatus extends InternalStatus {
public static final String DELETING = "DELETING"; // This status is set exactly before deleting the file from disk.
public static final String REMOVED = "REMOVED";
public static final String MISSING_SAMPLES = "MISSING_SAMPLES";
public static final String ERROR = "ERROR";

public static final List<String> STATUS_LIST = Arrays.asList(READY, DELETED, TRASHED, STAGE, MISSING, PENDING_DELETE, DELETING,
REMOVED, MISSING_SAMPLES);
REMOVED, MISSING_SAMPLES, ERROR);

public FileStatus(String status, String message) {
if (isValid(status)) {
Expand All @@ -59,7 +60,7 @@ public static boolean isValid(String status) {
}
if (status != null && (status.equals(STAGE) || status.equals(MISSING) || status.equals(TRASHED)
|| status.equals(PENDING_DELETE) || status.equals(DELETING) || status.equals(REMOVED)
|| status.equals(MISSING_SAMPLES))) {
|| status.equals(MISSING_SAMPLES) || status.equals(ERROR))) {
return true;
}
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -986,7 +986,7 @@ private int processFinishedJob(Job job, Enums.ExecutionStatus status) {
registeredFiles = fileManager.syncUntrackedFiles(job.getStudy().getId(), job.getOutDir().getPath(), uriPredicate, job.getId(),
token).getResults();
} catch (CatalogException e) {
logger.error("Could not registered files in Catalog: {}", e.getMessage(), e);
logger.error("Could not register files in Catalog: {}", e.getMessage(), e);
return 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.AclParams;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.models.file.FileContent;
import org.opencb.opencga.core.models.file.FileStatus;
import org.opencb.opencga.core.models.job.Job;
import org.opencb.opencga.core.models.job.JobInternal;
import org.opencb.opencga.core.models.job.JobInternalWebhook;
Expand All @@ -52,6 +54,7 @@
import org.opencb.opencga.master.monitor.models.PrivateJobUpdateParams;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URL;
Expand Down Expand Up @@ -538,6 +541,34 @@ public void testRunJobFailMissingExecutionResult() throws Exception {
assertEquals("Job could not finish successfully. Missing execution result", getJob(jobId).getInternal().getStatus().getDescription());
}

@Test
public void registerMalformedVcfFromExecutedJobTest() throws CatalogException {
HashMap<String, Object> params = new HashMap<>();
params.put(ExecutionDaemon.OUTDIR_PARAM, "outputDir/");
Job job = catalogManager.getJobManager().submit(studyFqn, "variant-index", Enums.Priority.MEDIUM, params, token).first();
String jobId = job.getId();

daemon.checkJobs();
job = catalogManager.getJobManager().get(studyFqn, jobId, QueryOptions.empty(), token).first();
executor.jobStatus.put(jobId, Enums.ExecutionStatus.READY);
try {
// Create an empty VCF file (this will fail because OpenCGA will not be able to parse it)
Path vcffile = Paths.get(job.getOutDir().getUri()).resolve("myemptyvcf.vcf");
Files.createFile(vcffile);
} catch (IOException e) {
throw new RuntimeException(e);
}

daemon.checkJobs();

// Check the file has been properly registered
job = catalogManager.getJobManager().get(studyFqn, jobId, QueryOptions.empty(), token).first();
assertEquals(1, job.getOutput().size());
assertEquals("myemptyvcf.vcf", job.getOutput().get(0).getName());
assertEquals(File.Format.VCF, job.getOutput().get(0).getFormat());
assertEquals(FileStatus.ERROR, job.getOutput().get(0).getInternal().getStatus().getId());
}

private void checkStatus(Job job, String status) {
assertEquals(status, job.getInternal().getStatus().getId());
assertEquals(status, job.getInternal().getStatus().getName());
Expand Down

0 comments on commit e037913

Please sign in to comment.