diff --git a/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/configuration/batch/jobs/CreateSubsnpAccessionsRecoverStateTest.java b/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/configuration/batch/jobs/CreateSubsnpAccessionsRecoverStateTest.java new file mode 100644 index 000000000..06998cfe5 --- /dev/null +++ b/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/configuration/batch/jobs/CreateSubsnpAccessionsRecoverStateTest.java @@ -0,0 +1,259 @@ +/* + * Copyright 2019 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package uk.ac.ebi.eva.accession.pipeline.configuration.batch.jobs; + +import com.lordofthejars.nosqlunit.mongodb.MongoDbConfigurationBuilder; +import com.lordofthejars.nosqlunit.mongodb.MongoDbRule; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.batch.core.BatchStatus; +import org.springframework.batch.core.JobExecution; +import org.springframework.batch.core.StepExecution; +import org.springframework.batch.test.JobLauncherTestUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.ApplicationContext; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.http.HttpMethod; +import org.springframework.http.HttpStatus; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestPropertySource; +import org.springframework.test.context.junit4.SpringRunner; +import org.springframework.test.web.client.ExpectedCount; +import org.springframework.test.web.client.MockRestServiceServer; +import org.springframework.web.client.RestTemplate; +import uk.ac.ebi.ampt2d.commons.accession.persistence.jpa.monotonic.entities.ContiguousIdBlock; +import uk.ac.ebi.ampt2d.commons.accession.persistence.jpa.monotonic.repositories.ContiguousIdBlockRepository; +import uk.ac.ebi.eva.accession.core.configuration.nonhuman.SubmittedVariantAccessioningConfiguration; +import uk.ac.ebi.eva.accession.core.model.eva.SubmittedVariantEntity; +import uk.ac.ebi.eva.accession.core.repository.nonhuman.eva.SubmittedVariantAccessioningRepository; +import uk.ac.ebi.eva.accession.pipeline.batch.io.AccessionReportWriter; +import uk.ac.ebi.eva.accession.pipeline.parameters.InputParameters; +import uk.ac.ebi.eva.accession.pipeline.test.BatchTestConfiguration; +import uk.ac.ebi.eva.accession.pipeline.test.FixSpringMongoDbRule; +import uk.ac.ebi.eva.accession.pipeline.test.RecoverTestAccessioningConfiguration; +import uk.ac.ebi.eva.commons.core.utils.FileUtils; +import uk.ac.ebi.eva.metrics.count.CountServiceParameters; + +import java.io.FileInputStream; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collection; +import java.util.Iterator; + +import static org.junit.Assert.assertEquals; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.method; +import static org.springframework.test.web.client.match.MockRestRequestMatchers.requestTo; +import static org.springframework.test.web.client.response.MockRestResponseCreators.withStatus; +import static uk.ac.ebi.eva.accession.pipeline.configuration.BeanNames.BUILD_REPORT_STEP; +import static uk.ac.ebi.eva.accession.pipeline.configuration.BeanNames.CHECK_SUBSNP_ACCESSION_STEP; +import static uk.ac.ebi.eva.accession.pipeline.configuration.BeanNames.CREATE_SUBSNP_ACCESSION_STEP; + +@RunWith(SpringRunner.class) +@ContextConfiguration(classes = {RecoverTestAccessioningConfiguration.class, BatchTestConfiguration.class, + SubmittedVariantAccessioningConfiguration.class}) +@TestPropertySource("classpath:accession-pipeline-recover-state-test.properties") +public class CreateSubsnpAccessionsRecoverStateTest { + private static final String TEST_DB = "test-db"; + + @Autowired + private SubmittedVariantAccessioningRepository mongoRepository; + + @Autowired + private ContiguousIdBlockRepository blockRepository; + + @Autowired + private InputParameters inputParameters; + + @Autowired + private MongoTemplate mongoTemplate; + + //needed for @UsingDataSet + @Autowired + private ApplicationContext applicationContext; + + @Rule + public MongoDbRule mongoDbRule = new FixSpringMongoDbRule( + MongoDbConfigurationBuilder.mongoDb().databaseName(TEST_DB).build()); + + @Autowired + private JobLauncherTestUtils jobLauncherTestUtils; + + @Autowired + @Qualifier("COUNT_STATS_REST_TEMPLATE") + private RestTemplate restTemplate; + + private final String URL_PATH_SAVE_COUNT = "/v1/bulk/count"; + + @Autowired + private CountServiceParameters countServiceParameters; + + private static final int EXPECTED_VARIANTS_ACCESSIONED_FROM_VCF = 22; + + private MockRestServiceServer mockServer; + + @Before + public void setUp() throws Exception { + this.cleanSlate(); + mockServer = MockRestServiceServer.createServer(restTemplate); + mockServer.expect(ExpectedCount.manyTimes(), requestTo(new URI(countServiceParameters.getUrl() + URL_PATH_SAVE_COUNT))) + .andExpect(method(HttpMethod.POST)) + .andRespond(withStatus(HttpStatus.OK)); + } + + @After + public void tearDown() throws Exception { + this.cleanSlate(); + mongoTemplate.dropCollection(SubmittedVariantEntity.class); + } + + public void cleanSlate() throws Exception { + Files.deleteIfExists(Paths.get(inputParameters.getOutputVcf())); + Files.deleteIfExists(Paths.get(inputParameters.getOutputVcf() + AccessionReportWriter.VARIANTS_FILE_SUFFIX)); + Files.deleteIfExists(Paths.get(inputParameters.getOutputVcf() + AccessionReportWriter.CONTIGS_FILE_SUFFIX)); + Files.deleteIfExists(Paths.get(inputParameters.getFasta() + ".fai")); + } + + /** + * Note that for this test to work, we prepare the Mongo database in {@link RecoverTestAccessioningConfiguration}. + */ + @Test + public void accessionJobShouldRecoverUncommittedAccessions() throws Exception { + verifyInitialDBState(); + + runAccessioningJob(); + + verifyEndDBState(); + + assertCountsInVcfReport(EXPECTED_VARIANTS_ACCESSIONED_FROM_VCF); + assertCountsInMongo(EXPECTED_VARIANTS_ACCESSIONED_FROM_VCF + 85); + } + + /** + * We initialize DB for the test by inserting sves in {@link RecoverTestAccessioningConfiguration}. + */ + private void verifyInitialDBState() { + // Contiguous Id Block DB: + // Initial state of DB is 4 blocks are "reserved" but not "committed" in postgresql + // block id first value last value last committed + // 1 5000000000 5000000029 4999999999 + // 2 5000000030 5000000059 5000000029 + // 3 5000000060 5000000089 5000000059 + // 4 5000000090 5000000119 5000000089 + + // Mongo DB + // 85 accessions have been used in mongoDB but are not reflected in the block allocation table + // 30 accessions belong to 1st block (5000000000 to 5000000029), + // 25 to the 2nd block (5000000030 to 500000034 and 5000000040 to 5000000059) + // 30 to the 3rd block (5000000060 to 5000000089) + // None in 4th block + assertEquals(85, mongoRepository.count()); // 30 + 25 + 30 + assertEquals(4, blockRepository.count()); + + //recover state run in constructor of the MonotonicAccessionGenerator, + // so will run when objects are being created even before job starts and will try to recover the blocks + // The below stage of DB is after the recover state has already ran + + // 1st block is recovered and last committed updated to 5000000029 as all are present in mongo + ContiguousIdBlock block1 = blockRepository.findById(1l).get(); + assertEquals(5000000000l, block1.getFirstValue()); + assertEquals(5000000029l, block1.getLastCommitted()); + assertEquals(5000000029l, block1.getLastValue()); + + // 2nd block's committed is updated 5000000034 as there are available accessions after that + ContiguousIdBlock block2 = blockRepository.findById(2l).get(); + assertEquals(5000000030l, block2.getFirstValue()); + assertEquals(5000000034l, block2.getLastCommitted()); + assertEquals(5000000059l, block2.getLastValue()); + + // 3rd block is not updated even though it is full (all accessions of this block are present in mongo) + // the current algorithm takes the uncompleted blocks in ascending order of last value + // and stops updating blocks as soon as it finds a block which is not full + // In our case 1st is picked and updated, + // 2nd is picked and updated but is not full + // the algorithm stops at this point and did not bother to check the 3rd block + ContiguousIdBlock block3 = blockRepository.findById(3l).get(); + assertEquals(5000000060l, block3.getFirstValue()); + assertEquals(5000000059l, block3.getLastCommitted()); + assertEquals(5000000089l, block3.getLastValue()); + + ContiguousIdBlock block4 = blockRepository.findById(4l).get(); + assertEquals(5000000090l, block4.getFirstValue()); + assertEquals(5000000089l, block4.getLastCommitted()); + assertEquals(5000000119l, block4.getLastValue()); + } + + private void verifyEndDBState() { + // VCF has 22 variants that needs to be accessioned + + assertEquals(107, mongoRepository.count()); // 85 (already present) + 22 (accessioned) + assertEquals(4, blockRepository.count()); + + ContiguousIdBlock block1 = blockRepository.findById(1l).get(); + assertEquals(5000000000l, block1.getFirstValue()); + assertEquals(5000000029l, block1.getLastCommitted()); + assertEquals(5000000029l, block1.getLastValue()); + + // used the 5 unused accessions 5000000030 to 5000000034 + ContiguousIdBlock block2 = blockRepository.findById(2l).get(); + assertEquals(5000000030l, block2.getFirstValue()); + assertEquals(5000000059l, block2.getLastCommitted()); + assertEquals(5000000059l, block2.getLastValue()); + + // Now that the 2nd block is full and committed, 3rd blocks also get's picked up and its last committed updated + ContiguousIdBlock block3 = blockRepository.findById(3l).get(); + assertEquals(5000000060l, block3.getFirstValue()); + assertEquals(5000000089l, block3.getLastCommitted()); + assertEquals(5000000089l, block3.getLastValue()); + + // used the remaining 17 (22 - 5 (2nd block)) from 4th block + ContiguousIdBlock block4 = blockRepository.findById(4l).get(); + assertEquals(5000000090l, block4.getFirstValue()); + assertEquals(5000000106l, block4.getLastCommitted()); + assertEquals(5000000119l, block4.getLastValue()); + } + + private void runAccessioningJob() throws Exception { + JobExecution jobExecution = jobLauncherTestUtils.launchJob(); + assertEquals(BatchStatus.COMPLETED, jobExecution.getStatus()); + assertStepNames(jobExecution.getStepExecutions()); + } + + private void assertStepNames(Collection stepExecutions) { + assertEquals(3, stepExecutions.size()); + Iterator iterator = stepExecutions.iterator(); + assertEquals(CREATE_SUBSNP_ACCESSION_STEP, iterator.next().getStepName()); + assertEquals(BUILD_REPORT_STEP, iterator.next().getStepName()); + assertEquals(CHECK_SUBSNP_ACCESSION_STEP, iterator.next().getStepName()); + } + + private void assertCountsInMongo(int expected) { + long numVariantsInMongo = mongoRepository.count(); + assertEquals(expected, numVariantsInMongo); + } + + private void assertCountsInVcfReport(int expected) throws IOException { + long numVariantsInReport = FileUtils.countNonCommentLines(new FileInputStream(inputParameters.getOutputVcf())); + assertEquals(expected, numVariantsInReport); + } + +} diff --git a/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/configuration/batch/jobs/CreateSubsnpAccessionsRecoveringStateJobConfigurationTest.java b/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/configuration/batch/jobs/CreateSubsnpAccessionsRecoveringStateJobConfigurationTest.java index 0a52c7619..111185e93 100644 --- a/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/configuration/batch/jobs/CreateSubsnpAccessionsRecoveringStateJobConfigurationTest.java +++ b/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/configuration/batch/jobs/CreateSubsnpAccessionsRecoveringStateJobConfigurationTest.java @@ -140,7 +140,7 @@ private void startWithAnAccessionInMongoNotCommittedInTheBlockService() { assertEquals(1, blockRepository.count()); // This means that the last committed accession is the previous one to the UNCOMMITTED_ACCESSION - assertEquals(UNCOMMITTED_ACCESSION - 1, blockRepository.findAll().iterator().next().getLastCommitted()); + assertEquals(UNCOMMITTED_ACCESSION, blockRepository.findAll().iterator().next().getLastCommitted()); } private void runJob() throws Exception { diff --git a/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/test/RecoverTestAccessioningConfiguration.java b/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/test/RecoverTestAccessioningConfiguration.java new file mode 100644 index 000000000..3903b4f12 --- /dev/null +++ b/eva-accession-pipeline/src/test/java/uk/ac/ebi/eva/accession/pipeline/test/RecoverTestAccessioningConfiguration.java @@ -0,0 +1,120 @@ +/* + * Copyright 2019 EMBL - European Bioinformatics Institute + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package uk.ac.ebi.eva.accession.pipeline.test; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import uk.ac.ebi.ampt2d.commons.accession.generators.monotonic.MonotonicAccessionGenerator; +import uk.ac.ebi.ampt2d.commons.accession.hashing.SHA1HashingFunction; +import uk.ac.ebi.ampt2d.commons.accession.persistence.jpa.monotonic.service.ContiguousIdBlockService; + +import uk.ac.ebi.eva.accession.core.model.ISubmittedVariant; +import uk.ac.ebi.eva.accession.core.model.SubmittedVariant; +import uk.ac.ebi.eva.accession.core.configuration.ApplicationProperties; +import uk.ac.ebi.eva.accession.core.service.nonhuman.eva.SubmittedVariantAccessioningDatabaseService; +import uk.ac.ebi.eva.accession.core.repository.nonhuman.eva.SubmittedVariantAccessioningRepository; +import uk.ac.ebi.eva.accession.core.model.eva.SubmittedVariantEntity; +import uk.ac.ebi.eva.accession.core.service.nonhuman.eva.SubmittedVariantMonotonicAccessioningService; +import uk.ac.ebi.eva.accession.core.summary.SubmittedVariantSummaryFunction; + +import java.util.ArrayList; +import java.util.List; + +/** + * This configuration class has the single purpose of having loaded in MongoDB an object *before* the + * MonotonicAccessionGenerator is instantiated (and autowired in the accessioning service and pipeline jobs) so that + * the generator can recover from uncommitted accessions. + * + * An uncommitted accession is an accession that is present in MongoDB but wasn't committed in the block service (e.g. + * due to an unexpected crash of the application in previous executions). If the block service doesn't recover, this + * might lead to a single accession being assigned to several different objects in mongo. + */ +@Configuration +public class RecoverTestAccessioningConfiguration { + + private static final Logger logger = LoggerFactory.getLogger(RecoveringAccessioningConfiguration.class); + + @Bean + public SubmittedVariantMonotonicAccessioningService submittedVariantMonotonicAccessioningService( + @Autowired @Qualifier("testSubmittedVariantAccessionGeneratorRecover") + MonotonicAccessionGenerator accessionGenerator, + @Autowired SubmittedVariantAccessioningDatabaseService databaseService) { + return new SubmittedVariantMonotonicAccessioningService(accessionGenerator, + databaseService, + new SubmittedVariantSummaryFunction(), + new SHA1HashingFunction()); + } + + @Bean("testSubmittedVariantAccessionGeneratorRecover") + public MonotonicAccessionGenerator testSubmittedVariantAccessionGeneratorRecover( + @Autowired SubmittedVariantAccessioningRepository repository, + @Autowired SubmittedVariantAccessioningDatabaseService databaseService, + @Autowired ApplicationProperties properties, + @Autowired ContiguousIdBlockService blockService) { + + repository.deleteAll(); + + List submittedVariantEntityList = new ArrayList<>(); + // Entries for 1st block + for(long i=5000000000l;i<=5000000029l;i++){ + SubmittedVariant model = new SubmittedVariant("assembly", 1111, + "project", "contig", 100, "A", "T", + null, false, false, false, + false, null); + SubmittedVariantEntity entity = new SubmittedVariantEntity(i, "hash"+i, model, 1); + submittedVariantEntityList.add(entity); + } + + // Entries for 2nd block + for(long i=5000000030l;i<=5000000034l;i++){ + SubmittedVariant model = new SubmittedVariant("assembly", 1111, + "project", "contig", 100, "A", "T", + null, false, false, false, + false, null); + SubmittedVariantEntity entity = new SubmittedVariantEntity(i, "hash"+i, model, 1); + submittedVariantEntityList.add(entity); + } + for(long i=5000000040l;i<=5000000059l;i++){ + SubmittedVariant model = new SubmittedVariant("assembly", 1111, + "project", "contig", 100, "A", "T", + null, false, false, false, + false, null); + SubmittedVariantEntity entity = new SubmittedVariantEntity(i, "hash"+i, model, 1); + submittedVariantEntityList.add(entity); + } + + // Entries for 3rd block + for(long i=5000000060l;i<=5000000089l;i++){ + SubmittedVariant model = new SubmittedVariant("assembly", 1111, + "project", "contig", 100, "A", "T", + null, false, false, false, + false, null); + SubmittedVariantEntity entity = new SubmittedVariantEntity(i, "hash"+i, model, 1); + submittedVariantEntityList.add(entity); + } + + repository.saveAll(submittedVariantEntityList); + + return new MonotonicAccessionGenerator<>(properties.getSubmitted().getCategoryId(), + properties.getInstanceId(), + blockService, + databaseService); + } +} diff --git a/eva-accession-pipeline/src/test/resources/properties/accession-pipeline-recover-state-test.properties b/eva-accession-pipeline/src/test/resources/properties/accession-pipeline-recover-state-test.properties new file mode 100644 index 000000000..f807bf537 --- /dev/null +++ b/eva-accession-pipeline/src/test/resources/properties/accession-pipeline-recover-state-test.properties @@ -0,0 +1,46 @@ +accessioning.instanceId=test-instance-recover-state-01 +accessioning.submitted.categoryId=test-pipeline-recover-state-ss + +accessioning.monotonic.test-pipeline-recover-state-ss.blockSize=30 +accessioning.monotonic.test-pipeline-recover-state-ss.blockStartValue=5000000000 +accessioning.monotonic.test-pipeline-recover-state-ss.nextBlockInterval=1000000000 + +spring.datasource.driver-class-name=org.hsqldb.jdbcDriver +spring.datasource.url=jdbc:hsqldb:mem:db;sql.syntax_pgs=true;DB_CLOSE_DELAY=-1 +spring.datasource.username=SA +spring.datasource.password= +spring.datasource.schema=test-data/contiguous_id_blocks_schema.sql +spring.datasource.data=test-data/contiguous_id_blocks_recover_state_data.sql +spring.jpa.hibernate.ddl-auto=update + +parameters.assemblyAccession=assembly +parameters.taxonomyAccession=1111 +parameters.projectAccession=project +parameters.chunkSize=100 +parameters.vcf=src/test/resources/input-files/vcf/small_genotyped.vcf.gz +parameters.vcfAggregation=NONE + +parameters.fasta=src/test/resources/input-files/fasta/Homo_sapiens.GRCh37.75.chr20.head_1200.fa +parameters.outputVcf=/tmp/accession-output.vcf +parameters.assemblyReportUrl=file:src/test/resources/input-files/assembly-report/assembly_report.txt +parameters.contigNaming=SEQUENCE_NAME + +eva.count-stats.url=http://localhost:8080 +eva.count-stats.username=username +eva.count-stats.password=password + +spring.jpa.show-sql=true + +spring.data.mongodb.uri=mongodb://|eva.mongo.host.test|:27017 +spring.data.mongodb.database=test-db +spring.data.mongodb.host=|eva.mongo.host.test| +spring.data.mongodb.password= +spring.data.mongodb.port=27017 +mongodb.read-preference=primary + +# See https://github.com/spring-projects/spring-boot/wiki/Spring-Boot-2.1-Release-Notes#bean-overriding +spring.main.allow-bean-definition-overriding=true + +# to fix exception javax.management.InstanceAlreadyExistsException: com.zaxxer.hikari:name=dataSource,type=HikariDataSource +# see https://stackoverflow.com/a/51798043/2375586 +spring.jmx.enabled=false \ No newline at end of file diff --git a/eva-accession-pipeline/src/test/resources/test-data/contiguous_id_blocks_recover_state_data.sql b/eva-accession-pipeline/src/test/resources/test-data/contiguous_id_blocks_recover_state_data.sql new file mode 100644 index 000000000..5e3e7fe57 --- /dev/null +++ b/eva-accession-pipeline/src/test/resources/test-data/contiguous_id_blocks_recover_state_data.sql @@ -0,0 +1,4 @@ +INSERT INTO contiguous_id_blocks VALUES(1, 'test-instance-recover-state-01', 'test-pipeline-recover-state-ss', 5000000000, 4999999999, 5000000029); +INSERT INTO contiguous_id_blocks VALUES(2, 'test-instance-recover-state-01', 'test-pipeline-recover-state-ss', 5000000030, 5000000029, 5000000059); +INSERT INTO contiguous_id_blocks VALUES(3, 'test-instance-recover-state-01', 'test-pipeline-recover-state-ss', 5000000060, 5000000059, 5000000089); +INSERT INTO contiguous_id_blocks VALUES(4, 'test-instance-recover-state-01', 'test-pipeline-recover-state-ss', 5000000090, 5000000089, 5000000119); \ No newline at end of file diff --git a/pom.xml b/pom.xml index 2cabaa281..d77db8336 100644 --- a/pom.xml +++ b/pom.xml @@ -28,7 +28,7 @@ UTF-8 4.13 0.8.5 - 0.7.9 + 0.7.10