Skip to content

Commit

Permalink
add counts after each step
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Jul 12, 2023
1 parent 29fbd00 commit aa24d59
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 9 deletions.
10 changes: 7 additions & 3 deletions opentargets_pharmgkb/counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ def __init__(self):
# Input counts (before annotation and exploding by drug, etc.)
self.clinical_annotations = 0
self.with_rs = 0
# Counts after exploding by allele
self.allele_annotations = 0
# Counts after exploding by each attribute
self.exploded_alleles = 0
self.exploded_drugs = 0
self.exploded_phenotypes = 0
# Output counts (after annotation and exploding)
self.evidence_strings = 0
self.with_chebi = 0
Expand All @@ -22,7 +24,9 @@ def __init__(self):
def report(self):
report_str = f'\nTotal clinical annotations: {self.clinical_annotations}\n'
report_str += f'\tWith RS: {self.with_rs}\n'
report_str += f'Total annotations by allele: {self.allele_annotations}\n'
report_str += f'Exploded by allele: {self.exploded_alleles}\n'
report_str += f'Exploded by drug: {self.exploded_drugs}\n'
report_str += f'Exploded by phenotype: {self.exploded_phenotypes}\n'
report_str += f'Total evidence strings: {self.evidence_strings}\n'
report_str += f'\tWith CHEBI: {self.with_chebi}\n'
report_str += f'\tWith EFO phenotype: {self.with_efo}\n'
Expand Down
16 changes: 10 additions & 6 deletions opentargets_pharmgkb/evidence_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,20 @@ def pipeline(data_dir, created_date, output_path):

# Main processing
merged_with_alleles_table = pd.merge(rs_only_table, clinical_alleles_table, on=ID_COL_NAME, how='left')
counts.allele_annotations = len(merged_with_alleles_table)
coordinates_table = get_vcf_coordinates(merged_with_alleles_table)
consequences_table = get_functional_consequences(coordinates_table)
# mapped_genes = explode_and_map_genes(consequences_table)
mapped_drugs = explode_and_map_drugs(consequences_table, drugs_table)
counts.exploded_alleles = len(merged_with_alleles_table)

mapped_drugs = explode_and_map_drugs(merged_with_alleles_table, drugs_table)
counts.exploded_drugs = len(mapped_drugs)

mapped_phenotypes = explode_and_map_phenotypes(mapped_drugs)
counts.exploded_phenotypes = len(mapped_phenotypes)

coordinates_table = get_vcf_coordinates(mapped_phenotypes)
consequences_table = get_functional_consequences(coordinates_table)

# Add clinical evidence with PMIDs
pmid_evidence = clinical_evidence_table[clinical_evidence_table['PMID'].notna()]
evidence_table = pd.merge(mapped_phenotypes, pmid_evidence.groupby(by=ID_COL_NAME).aggregate(
evidence_table = pd.merge(consequences_table, pmid_evidence.groupby(by=ID_COL_NAME).aggregate(
publications=('PMID', list)), on=ID_COL_NAME)

# Gather output counts
Expand Down

0 comments on commit aa24d59

Please sign in to comment.