Skip to content

Commit

Permalink
Merge pull request #198 from andresfsilva/feature/check-genotype-freq…
Browse files Browse the repository at this point in the history
…uency

EVA-1199 Validate genotypes and frequencies are present

The flag `--require-evidence` can be passed to the vcf validator to indicate it should check the VCF has either Genotypes or Allele Frequencies
  • Loading branch information
jmmut authored Apr 8, 2020
2 parents 5b30298 + c4f529e commit 1d37bde
Show file tree
Hide file tree
Showing 24 changed files with 2,605 additions and 1,420 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Different types of validation reports can be written with the `-r` / `--report`

Each report is written into its own file and it is named after the input file, followed by a timestamp. The default output directory is the same as the input file's if provided using `-i`, or the current directory if using the standard input; it can be changed with the `-o` / `--outdir` option.

A flag for validating evidence can be passed with `--require-evidence`. This will validate whether the VCF includes either Genotypes or Allele Frequencies. This flag will be **false** by default.

### Debugulator

There are some simple errors that can be automatically fixed. The most common error is the presence of duplicate variants. The needed parameters are the original VCF and the report generated by a previous run of the validator with the option `-r database`.
Expand Down
16 changes: 14 additions & 2 deletions inc/vcf/parsing_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,10 @@ namespace ebi
{
namespace vcf
{

struct AdditionalChecks {
bool checkEvidence;
};

struct ParsingState
{
size_t n_lines;
Expand All @@ -46,8 +49,9 @@ namespace ebi
std::vector<std::unique_ptr<Error>> warnings;

std::multimap<std::string, std::string> defined_metadata;
AdditionalChecks additionalChecks;

ParsingState(std::shared_ptr<Source> source);
ParsingState(std::shared_ptr<Source> source, AdditionalChecks additionalChecks);
virtual ~ParsingState() = default;

void set_version(Version version);
Expand All @@ -66,6 +70,14 @@ namespace ebi
bool is_well_defined_meta(std::string const & meta_type, std::string const & id) const;

void add_well_defined_meta(std::string const & meta_type, std::string const & id);

void validate_additional_checks();

bool genotypes_present();

bool allele_frequencies_present();

bool allele_count_present();
};
}
}
Expand Down
2 changes: 1 addition & 1 deletion inc/vcf/string_constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ namespace ebi
{
namespace vcf
{

// VCF versions
const std::string VCF_V41 = "VCFv4.1";
const std::string VCF_V42 = "VCFv4.2";
Expand Down Expand Up @@ -63,6 +62,7 @@ namespace ebi
const char REPORT_OPTION[] = "report,r";
const char OUTDIR_OPTION[] = "outdir,o";
const char OUTPUT_OPTION[] = "output,o";
const char CHECK_EVIDENCE[] = "require-evidence";

// fields
const std::string ID = "ID";
Expand Down
21 changes: 11 additions & 10 deletions inc/vcf/validator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ namespace ebi
public ParsingState
{
public:
ParserImpl(std::shared_ptr<Source> source);
ParserImpl(std::shared_ptr<Source> source, AdditionalChecks additionalChecks);
virtual ~ParserImpl() = default;

void parse(std::string const & text) override;
Expand Down Expand Up @@ -130,7 +130,7 @@ namespace ebi
using ErrorPolicy = typename Configuration::ErrorPolicy;
using OptionalPolicy = typename Configuration::OptionalPolicy;

ParserImpl_v41(std::shared_ptr<Source> source);
ParserImpl_v41(std::shared_ptr<Source> source, AdditionalChecks additionalChecks);

private:
void parse_buffer(char const * p, char const * pe, char const * eof);
Expand All @@ -148,7 +148,7 @@ namespace ebi
using ErrorPolicy = typename Configuration::ErrorPolicy;
using OptionalPolicy = typename Configuration::OptionalPolicy;

ParserImpl_v42(std::shared_ptr<Source> source);
ParserImpl_v42(std::shared_ptr<Source> source, AdditionalChecks additionalChecks);

private:
void parse_buffer(char const * p, char const * pe, char const * eof);
Expand All @@ -166,7 +166,7 @@ namespace ebi
using ErrorPolicy = typename Configuration::ErrorPolicy;
using OptionalPolicy = typename Configuration::OptionalPolicy;

ParserImpl_v43(std::shared_ptr<Source> source);
ParserImpl_v43(std::shared_ptr<Source> source, AdditionalChecks additionalChecks);

private:
void parse_buffer(char const * p, char const * pe, char const * eof);
Expand All @@ -185,15 +185,16 @@ namespace ebi
using FullValidator_v43 = ParserImpl_v43<FullValidatorCfg>;
using Reader_v43 = ParserImpl_v43<ReaderCfg>;

bool is_valid_vcf_file(std::istream &input,
const std::string &sourceName,
bool is_valid_vcf_file(std::istream &input, const std::string &sourceName,
ValidationLevel validationLevel,
std::vector<std::unique_ptr<ebi::vcf::ReportWriter>> &outputs);
std::vector<std::unique_ptr<ebi::vcf::ReportWriter>> &outputs,
AdditionalChecks checks);

bool process_vcf_stream(std::istream &input,
const std::string &sourceName,
ValidationLevel validationLevel,
std::vector<std::unique_ptr<ebi::vcf::ReportWriter>> &outputs);
const std::string &sourceName,
ValidationLevel validationLevel,
std::vector<std::unique_ptr<ebi::vcf::ReportWriter>> &outputs,
AdditionalChecks additionalChecks);

std::string get_compression_from_extension(std::string const & source);

Expand Down
Loading

0 comments on commit 1d37bde

Please sign in to comment.