Skip to content

Commit

Permalink
storage: Fix compoundHet query in single-study projects. #TASK-6311
Browse files Browse the repository at this point in the history
  • Loading branch information
j-coll committed Jun 20, 2024
1 parent 63dec4c commit b6e6e86
Show file tree
Hide file tree
Showing 22 changed files with 177 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ private void rebuildSampleFileIds(VariantStorageMetadataManager metadataManager,
for (Map.Entry<Integer, List<Integer>> entry : batch.entrySet()) {
Integer sampleId = entry.getKey();
List<Integer> fileIds = entry.getValue();

List<Integer> actualFiles = metadataManager.getSampleMetadata(studyId, sampleId).getFiles();
if (actualFiles.size() != fileIds.size() || !actualFiles.containsAll(fileIds)) {
fixedSamples++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,19 @@ public Iterator<FileMetadata> fileMetadataIterator(int studyId) {
return fileDBAdaptor.fileIterator(studyId);
}

public SampleMetadata getSampleMetadata(Integer studyId, Integer sampleId) {
return getSampleMetadata(studyId.intValue(), sampleId.intValue());
}

public SampleMetadata getSampleMetadata(int studyId, Integer sampleId) {
return getSampleMetadata(studyId, sampleId.intValue());
}

public SampleMetadata getSampleMetadata(int studyId, Object sample) {
int sampleId = getSampleIdOrFail(studyId, sample);
return getSampleMetadata(studyId, sampleId);
}

public SampleMetadata getSampleMetadata(int studyId, int sampleId) {
return sampleDBAdaptor.getSampleMetadata(studyId, sampleId, null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1348,7 +1348,7 @@ public VariantQueryExecutor getVariantQueryExecutor(Query query, QueryOptions op
public VariantQueryExecutor getVariantQueryExecutor(ParsedVariantQuery variantQuery) {
try {
for (VariantQueryExecutor executor : getVariantQueryExecutors()) {
if (executor.canUseThisExecutor(variantQuery.getQuery(), variantQuery.getInputOptions())) {
if (executor.canUseThisExecutor(variantQuery, variantQuery.getInputOptions())) {
logger.info("Using VariantQueryExecutor : " + executor.getClass().getName());
logger.info(" Query : " + VariantQueryUtils.printQuery(variantQuery.getInputQuery()));
logger.info(" Options : " + variantQuery.getInputOptions().toJson());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,14 +267,6 @@ public VariantStudyQuery setStudies(ParsedQuery<String> studies) {
return this;
}

public String getStudyOrFail() {
if (studies == null || studies.size() != 1) {
throw new VariantQueryException("Require exactly one study");
} else {
return studies.get(0);
}
}

public ParsedQuery<KeyOpValue<SampleMetadata, List<String>>> getGenotypes() {
return genotypes;
}
Expand Down Expand Up @@ -311,6 +303,19 @@ public void setDefaultStudy(StudyMetadata defaultStudy) {
public StudyMetadata getDefaultStudy() {
return defaultStudy;
}

public StudyMetadata getDefaultStudyOrFail() {
if (defaultStudy == null) {
if (studies.size() != 1) {
throw new VariantQueryException("Only one study is allowed. Found " + studies.size() + " studies");
} else {
throw new VariantQueryException("One study required. None provided");
}
} else {
return defaultStudy;
}
}

}

public static class VariantQueryXref {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@
import org.opencb.commons.datastore.core.QueryParam;
import org.opencb.opencga.core.models.variant.VariantAnnotationConstants;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
import org.opencb.opencga.storage.core.metadata.models.SampleMetadata;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.metadata.models.TaskMetadata;
import org.opencb.opencga.storage.core.metadata.models.VariantScoreMetadata;
import org.opencb.opencga.storage.core.metadata.models.*;
import org.opencb.opencga.storage.core.utils.CellBaseUtils;
import org.opencb.opencga.storage.core.variant.VariantStorageOptions;
import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass;
Expand Down Expand Up @@ -170,15 +167,14 @@ public ParsedVariantQuery parseQuery(Query inputQuery, QueryOptions options, boo
VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.key(),
VariantStorageOptions.APPROXIMATE_COUNT_SAMPLING_SIZE.defaultValue()));

variantQuery.setProjection(projectionParser.parseVariantQueryProjection(inputQuery, options));

VariantQuery query;
if (!skipPreProcess) {
query = new VariantQuery(preProcessQuery(inputQuery, options, variantQuery.getProjection()));
query = new VariantQuery(preProcessQuery(inputQuery, options));
} else {
query = new VariantQuery(inputQuery);
}
variantQuery.setQuery(query);
variantQuery.setProjection(projectionParser.parseVariantQueryProjection(query, options));

List<Region> geneRegions = Region.parseRegions(query.getString(ANNOT_GENE_REGIONS.key()));
variantQuery.setGeneRegions(geneRegions == null ? Collections.emptyList() : geneRegions);
Expand Down Expand Up @@ -221,28 +217,21 @@ public ParsedVariantQuery parseQuery(Query inputQuery, QueryOptions options, boo
= new ParsedQuery<>(sampleDataQuery.getKey(), sampleDataQuery.getOperation(), new ArrayList<>(sampleDataQuery.size()));
for (KeyValues<String, KeyOpValue<String, String>> keyValues : sampleDataQuery) {
sampleDataQueryWithMetadata.getValues().add(
keyValues.mapKey(sample -> {
int sampleId = metadataManager.getSampleIdOrFail(defaultStudy.getId(), sample);
return metadataManager.getSampleMetadata(defaultStudy.getId(), sampleId);
}));
keyValues.mapKey(sample -> metadataManager.getSampleMetadata(defaultStudy.getId(), sample)));
}
studyQuery.setSampleDataQuery(sampleDataQueryWithMetadata);
}

return variantQuery;
}

public final Query preProcessQuery(Query originalQuery, QueryOptions options) {
return preProcessQuery(originalQuery, options, null);
}

protected Query preProcessQuery(Query originalQuery, QueryOptions options, VariantQueryProjection projection) {
public Query preProcessQuery(Query originalQuery, QueryOptions options) {
// Copy input query! Do not modify original query!
Query query = VariantQueryUtils.copy(originalQuery);

preProcessAnnotationParams(query);

preProcessStudyParams(query, options, projection);
preProcessStudyParams(query, options);

if (options != null && options.getLong(QueryOptions.LIMIT) < 0) {
throw VariantQueryException.malformedParam(QueryOptions.LIMIT, options.getString(QueryOptions.LIMIT),
Expand Down Expand Up @@ -388,7 +377,7 @@ private VariantType parseVariantType(String type) {
}
}

protected void preProcessStudyParams(Query query, QueryOptions options, VariantQueryProjection projection) {
protected void preProcessStudyParams(Query query, QueryOptions options) {
StudyMetadata defaultStudy = getDefaultStudy(query);
QueryOperation formatOperator = null;
if (isValidParam(query, SAMPLE_DATA)) {
Expand Down Expand Up @@ -613,17 +602,21 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ

if (isValidParam(query, SAMPLE_MENDELIAN_ERROR)
|| isValidParam(query, SAMPLE_DE_NOVO)
|| isValidParam(query, SAMPLE_DE_NOVO_STRICT)) {
|| isValidParam(query, SAMPLE_DE_NOVO_STRICT)
|| isValidParam(query, SAMPLE_COMPOUND_HETEROZYGOUS)) {
boolean requireMendelianReady = false;
QueryParam param = null;
if (isValidParam(query, SAMPLE_MENDELIAN_ERROR)) {
param = SAMPLE_MENDELIAN_ERROR;
requireMendelianReady = true;
}
if (isValidParam(query, SAMPLE_DE_NOVO)) {
if (param != null) {
throw VariantQueryException.unsupportedParamsCombination(
param, query.getString(param.key()),
SAMPLE_DE_NOVO, query.getString(SAMPLE_DE_NOVO.key()));
}
requireMendelianReady = true;
param = SAMPLE_DE_NOVO;
}
if (isValidParam(query, SAMPLE_DE_NOVO_STRICT)) {
Expand All @@ -632,8 +625,21 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ
param, query.getString(param.key()),
SAMPLE_DE_NOVO_STRICT, query.getString(SAMPLE_DE_NOVO_STRICT.key()));
}
requireMendelianReady = true;
param = SAMPLE_DE_NOVO_STRICT;
}
if (isValidParam(query, SAMPLE_COMPOUND_HETEROZYGOUS)) {
if (param != null) {
throw VariantQueryException.unsupportedParamsCombination(
param, query.getString(param.key()),
SAMPLE_COMPOUND_HETEROZYGOUS, query.getString(SAMPLE_COMPOUND_HETEROZYGOUS.key()));
}
requireMendelianReady = false;
param = SAMPLE_COMPOUND_HETEROZYGOUS;
}
if (param == null) {
throw new IllegalStateException("Unknown param");
}
if (defaultStudy == null) {
throw VariantQueryException.missingStudyForSamples(query.getAsStringList(param.key()),
metadataManager.getStudyNames());
Expand All @@ -645,15 +651,18 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ
genotypeParam, query.getString(genotypeParam.key())
);
}
List<String> samples = query.getAsStringList(param.key());
Object value = query.get(param.key());
List<String> samples;
if (value instanceof Trio) {
samples = Collections.singletonList(((Trio) value).getChild());
} else {
samples = query.getAsStringList(param.key());
}
Set<String> samplesAndParents = new LinkedHashSet<>(samples);
for (String sample : samples) {
Integer sampleId = metadataManager.getSampleId(defaultStudy.getId(), sample);
if (sampleId == null) {
throw VariantQueryException.sampleNotFound(sample, defaultStudy.getName());
}
SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(defaultStudy.getId(), sampleId);
if (TaskMetadata.Status.READY != sampleMetadata.getMendelianErrorStatus()) {
SampleMetadata sampleMetadata = metadataManager.getSampleMetadata(defaultStudy.getId(), sample);
if (requireMendelianReady
&& TaskMetadata.Status.READY != sampleMetadata.getMendelianErrorStatus()) {
throw VariantQueryException.malformedParam(param, "Sample \"" + sampleMetadata.getName()
+ "\" does not have the Mendelian Errors precomputed yet");
}
Expand All @@ -674,6 +683,21 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ
} else {
query.put(INCLUDE_SAMPLE.key(), new ArrayList<>(samplesAndParents));
}
if (param == SAMPLE_COMPOUND_HETEROZYGOUS) {
int studyId = defaultStudy.getId();
if (!(value instanceof Trio)) {
if (samples.size() > 1) {
throw VariantQueryException.malformedParam(SAMPLE, value.toString(),
"More than one sample provided for compound heterozygous filter.");
}
SampleMetadata sm = metadataManager.getSampleMetadata(studyId, samples.get(0));
Trio trio = new Trio(null,
metadataManager.getSampleName(studyId, sm.getFather()),
metadataManager.getSampleName(studyId, sm.getMother()),
sm.getName());
query.put(SAMPLE_COMPOUND_HETEROZYGOUS.key(), trio);
}
}
}

if (isValidParam(query, SCORE)) {
Expand Down Expand Up @@ -704,9 +728,7 @@ protected void preProcessStudyParams(Query query, QueryOptions options, VariantQ
|| isValidParam(query, SAMPLE_SKIP)
|| isValidParam(query, SAMPLE_LIMIT)
) {
if (projection == null) {
projection = projectionParser.parseVariantQueryProjection(query, options);
}
VariantQueryProjection projection = projectionParser.parseVariantQueryProjection(query, options);
// Apply the sample pagination.
// Remove the sampleLimit and sampleSkip to avoid applying the pagination twice
query.remove(SAMPLE_SKIP.key());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import org.opencb.commons.datastore.core.QueryParam;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator;
Expand All @@ -40,7 +41,8 @@ public BreakendVariantQueryExecutor(String storageEngineId, ObjectMap options,
}

@Override
public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException {
public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException {
VariantQuery query = variantQuery.getQuery();
return query.getString(VariantQueryParam.TYPE.key()).equals(VariantType.BREAKEND.name())
&& VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@ public CompoundHeterozygousQueryExecutor(VariantStorageMetadataManager metadataM
}

@Override
public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException {
return isValidParam(query, VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS);
public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException {
return isValidParam(variantQuery.getQuery(), VariantQueryUtils.SAMPLE_COMPOUND_HETEROZYGOUS);
}

@Override
protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) {
Trio trio = getCompHetTrio(variantQuery.getQuery());
String study = variantQuery.getStudyQuery().getStudyOrFail();
String study = variantQuery.getStudyQuery().getDefaultStudyOrFail().getName();
return getOrIterator(study, trio.getChild(), trio.getFather(), trio.getMother(),
variantQuery, iterator);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ protected Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator
}

@Override
public boolean canUseThisExecutor(Query query, QueryOptions options) {
public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) {
for (QueryParam unsupportedParam : UNSUPPORTED_PARAMS) {
if (VariantQueryUtils.isValidParam(query, unsupportedParam)) {
if (VariantQueryUtils.isValidParam(variantQuery.getQuery(), unsupportedParam)) {
logger.warn("Unsupported variant query param {} in {}",
unsupportedParam.key(),
DBAdaptorVariantQueryExecutor.class.getSimpleName());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.opencb.opencga.storage.core.metadata.models.CohortMetadata;
import org.opencb.opencga.storage.core.metadata.models.StudyMetadata;
import org.opencb.opencga.storage.core.variant.adaptors.GenotypeClass;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryParam;
import org.opencb.opencga.storage.core.variant.adaptors.iterators.VariantDBIterator;
import org.opencb.opencga.storage.core.variant.query.*;
Expand All @@ -34,7 +35,8 @@ public NoOpVariantQueryExecutor(VariantStorageMetadataManager metadataManager, S
}

@Override
public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException {
public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException {
VariantQuery query = variantQuery.getQuery();
boolean sampleQuery = false;
String sample = null;
if (VariantQueryUtils.isValidParam(query, VariantQueryParam.GENOTYPE)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import org.opencb.biodata.models.variant.Variant;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.Query;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.metadata.VariantStorageMetadataManager;
Expand Down Expand Up @@ -65,12 +64,12 @@ public static void setDefaultTimeout(QueryOptions queryOptions, ObjectMap config

/**
* Determine if this VariantQueryExecutor can run the given query.
* @param query Query to execute
* @param variantQuery Query to execute
* @param options Options for the query
* @return True if this variant query executor is valid for the query
* @throws StorageEngineException if there is an error
*/
public abstract boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException;
public abstract boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException;

protected abstract Object getOrIterator(ParsedVariantQuery variantQuery, boolean iterator) throws StorageEngineException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.opencb.opencga.storage.core.exceptions.StorageEngineException;
import org.opencb.opencga.storage.core.exceptions.VariantSearchException;
import org.opencb.opencga.storage.core.variant.adaptors.VariantDBAdaptor;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQuery;
import org.opencb.opencga.storage.core.variant.adaptors.VariantQueryException;
import org.opencb.opencga.storage.core.variant.query.ParsedVariantQuery;
import org.opencb.opencga.storage.core.variant.search.solr.VariantSearchManager;
Expand All @@ -28,7 +29,8 @@ public SamplesSearchIndexVariantQueryExecutor(VariantDBAdaptor dbAdaptor, Varian
}

@Override
public boolean canUseThisExecutor(Query query, QueryOptions options) throws StorageEngineException {
public boolean canUseThisExecutor(ParsedVariantQuery variantQuery, QueryOptions options) throws StorageEngineException {
VariantQuery query = variantQuery.getQuery();
String samplesCollection = inferSpecificSearchIndexSamplesCollection(query, options, getMetadataManager(), dbName);
return samplesCollection != null && searchActiveAndAlive(samplesCollection);
}
Expand Down
Loading

0 comments on commit b6e6e86

Please sign in to comment.