diff --git a/data-exploration/direction-of-effect.ipynb b/data-exploration/direction-of-effect.ipynb new file mode 100644 index 0000000..70525d8 --- /dev/null +++ b/data-exploration/direction-of-effect.ipynb @@ -0,0 +1,4181 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "9d2ed9ab-c78b-4d67-8b9b-18d22e44444b", + "metadata": {}, + "source": [ + "# Variant annotation tables & direction of effect investigation\n", + "\n", + "## Table of contents\n", + "\n", + "1. [Initial data exploration](#Initial-data-exploration)\n", + " 1. [Example clinical annotation](#Example-clinical-annotation)\n", + "2. [Coverage](#Coverage)\n", + "3. [Direction of effect](#Direction-of-effect)\n", + " 1. [Sentence breakdown](#Sentence-breakdown)\n", + " 2. [Vocabulary](#Vocabulary)\n", + "4. [Alleles and genotypes](#Alleles-and-genotypes)\n", + "5. [Bonus material](#Bonus-material)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "c6f9476d-828f-4ff7-82df-e2830b3e2767", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import os\n", + "import csv\n", + "import re\n", + "from collections import Counter\n", + "\n", + "import pandas as pd\n", + "\n", + "from opentargets_pharmgkb.evidence_generation import ID_COL_NAME\n", + "from opentargets_pharmgkb.pandas_utils import read_tsv_to_df" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e6a46840-39c0-4518-8a05-9258568cda20", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "pd.set_option('display.max_columns', None)\n", + "pd.set_option('display.max_rows', 100)\n", + "pd.set_option('display.max_colwidth', None)" + ] + }, + { + "cell_type": "markdown", + "id": "bf50f6c5-0b82-41f7-932e-1b78c2dc22df", + "metadata": {}, + "source": [ + "## Initial data exploration\n", + "\n", + "[Top of page](#Table-of-contents)\n", + "\n", + "The variant annotations zip file contains 4 new tables, described in the readme as follows:\n", + ">* **var_pheno_ann.tsv**: Contains associations in which the variant affects a phenotype, with or without drug information.\n", + ">* **var_drug_ann.tsv**: Contains associations in which the variant affects a drug dose, response, metabolism, etc.\n", + ">* **var_fa_ann.tsv**: Contains in vitro and functional analysis-type associations.\n", + ">* **study_parameters.tsv**: Contains information about the study population size, biogeographical group and statistics for the variant annotations; this file is cross-referenced against the 3 variant annotation files.\n", + "\n", + "Study parameters table is interesting but feels out of scope for now at least, will ignore for the rest of the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "27861f6f-9343-4c44-be4f-4bf70e39eee4", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data_dir = '/home/april/projects/opentargets/pharmgkb/doe'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "6c87bbc9-4c87-40a0-84d5-de9249f50192", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Download new data (2024-05-05)\n", + "# !cd {data_dir}\n", + "\n", + "# !wget -q https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip\n", + "# !wget -q https://api.pharmgkb.org/v1/download/file/data/variantAnnotations.zip\n", + "\n", + "# !unzip -jq clinicalAnnotations.zip \"*.tsv\" -d {data_dir}\n", + "# !unzip -jq variantAnnotations.zip \"*.tsv\" -d {data_dir}\n", + "\n", + "# !rm clinicalAnnotations.zip variantAnnotations.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "4d823e4f-3e41-4bc6-a1d3-003c6bd14898", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "var_drug_ann = read_tsv_to_df(os.path.join(data_dir, 'var_drug_ann.tsv'))\n", + "var_fa_ann = read_tsv_to_df(os.path.join(data_dir, 'var_fa_ann.tsv'))\n", + "var_pheno_ann = read_tsv_to_df(os.path.join(data_dir, 'var_pheno_ann.tsv'))" + ] + }, + { + "cell_type": "markdown", + "id": "f7c7bbdb-9a0f-45ba-ac34-4070b8f0dbdb", + "metadata": {}, + "source": [ + "Questions to consider:\n", + "* How many annotations?\n", + "* What's the coverage of variant/haplotypes relative to clinical annotations?\n", + "* What are the relevant fields?\n", + "* What's the relationship between these annotations and clinical annotations?\n", + "* Which of these columns has a controlled vocab vs. free text?\n", + "* How do the different variant-level annotation sentences contribute to the overall clincial annotation sentences?" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "0852d631-f8ff-4fa8-afe0-6b2599b0b9c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11901" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(var_drug_ann)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "64ac3c98-d1cf-48c1-8805-ecdc4530d649", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2009" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(var_fa_ann)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "b826596b-12e4-4095-b9f5-f07796f0e7d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13517" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(var_pheno_ann)" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "id": "d22115ea-56f6-404d-9839-c91c8ed74a89", + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Variant Annotation IDVariant/HaplotypesGeneDrug(s)PMIDPhenotype CategorySignificanceNotesSentenceAllelesSpecialty PopulationMetabolizer typesisPluralIs/Is Not associatedDirection of effectPD/PK termsMultiple drugs And/orPopulation typesPopulation Phenotypes or diseasesMultiple phenotypes or diseases And/orComparison Allele(s) or Genotype(s)Comparison Metabolizer types
01451834452CYP3A4*1, CYP3A4*17CYP3A4nifedipine15634941Other, Metabolism/PKnot statedin vitro expression of the recombinant CYP3A4*17 allelic protein and the wild-type proteinCYP3A4 *17 is associated with decreased metabolism of nifedipine as compared to CYP3A4 *1.*17NaNNaNIsAssociated withdecreasedmetabolism ofNaNNaNNaNNaN*1NaN
11451159680rs5031016CYP2A6warfarin22248286DosagenoNo association was found between this variant and warfarin-maintenance dose. Described as CYP2A6*7 in this study.Allele G is not associated with increased dose of warfarin in people with an international normalized ratio (INR) of 2.0-3.0 as compared to allele A.GNaNNaNIsNot associated withincreaseddose ofNaNin people withOther:an international normalized ratio (INR) of 2.0-3.0NaNANaN
21451306860CYP2C9*11CYP2C9warfarin33350885Dosagenot stated\"This case suggests that CYP2C9 *11/*11 carriers require approximately two thirds less warfarin than CYP2C9\" normal function homozygotes.CYP2C9 *11/*11 is associated with decreased dose of warfarin.*11/*11NaNNaNIsAssociated withdecreaseddose ofNaNNaNNaNNaNNaNNaN
31448997750CYP2B6*1, CYP2B6*18CYP2B6efavirenz16495778Metabolism/PKyesPlease note that in the paper the allele was referred to as CYP2B6*16. CYP2B6*16 and *18 alleles have been consolidated by PharmVar in Jan 2020, with *16 now listed as a suballele of *18 (CYP2B6*18.002). This annotation is updated to be on CYP2B6*18, instead of CYP2B6*16.CYP2B6 *1/*18 is associated with increased concentrations of efavirenz in people with HIV Infections as compared to CYP2B6 *1/*1.*1/*18NaNNaNIsAssociated withincreasedconcentrations ofNaNin people withDisease:HIV InfectionsNaN*1/*1NaN
41448631821CYP2C19*1, CYP2C19*2CYP2C19clomipramine, desmethyl clomipramine28470111Metabolism/PKnoin a single individualCYP2C19 *1/*2 is associated with increased trough concentration of clomipramine and desmethyl clomipramine.*1/*2NaNNaNIsAssociated withincreasedtrough concentration ofandNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Variant Annotation ID Variant/Haplotypes Gene \\\n", + "0 1451834452 CYP3A4*1, CYP3A4*17 CYP3A4 \n", + "1 1451159680 rs5031016 CYP2A6 \n", + "2 1451306860 CYP2C9*11 CYP2C9 \n", + "3 1448997750 CYP2B6*1, CYP2B6*18 CYP2B6 \n", + "4 1448631821 CYP2C19*1, CYP2C19*2 CYP2C19 \n", + "\n", + " Drug(s) PMID Phenotype Category \\\n", + "0 nifedipine 15634941 Other, Metabolism/PK \n", + "1 warfarin 22248286 Dosage \n", + "2 warfarin 33350885 Dosage \n", + "3 efavirenz 16495778 Metabolism/PK \n", + "4 clomipramine, desmethyl clomipramine 28470111 Metabolism/PK \n", + "\n", + " Significance \\\n", + "0 not stated \n", + "1 no \n", + "2 not stated \n", + "3 yes \n", + "4 no \n", + "\n", + " Notes \\\n", + "0 in vitro expression of the recombinant CYP3A4*17 allelic protein and the wild-type protein \n", + "1 No association was found between this variant and warfarin-maintenance dose. Described as CYP2A6*7 in this study. \n", + "2 \"This case suggests that CYP2C9 *11/*11 carriers require approximately two thirds less warfarin than CYP2C9\" normal function homozygotes. \n", + "3 Please note that in the paper the allele was referred to as CYP2B6*16. CYP2B6*16 and *18 alleles have been consolidated by PharmVar in Jan 2020, with *16 now listed as a suballele of *18 (CYP2B6*18.002). This annotation is updated to be on CYP2B6*18, instead of CYP2B6*16. \n", + "4 in a single individual \n", + "\n", + " Sentence \\\n", + "0 CYP3A4 *17 is associated with decreased metabolism of nifedipine as compared to CYP3A4 *1. \n", + "1 Allele G is not associated with increased dose of warfarin in people with an international normalized ratio (INR) of 2.0-3.0 as compared to allele A. \n", + "2 CYP2C9 *11/*11 is associated with decreased dose of warfarin. \n", + "3 CYP2B6 *1/*18 is associated with increased concentrations of efavirenz in people with HIV Infections as compared to CYP2B6 *1/*1. \n", + "4 CYP2C19 *1/*2 is associated with increased trough concentration of clomipramine and desmethyl clomipramine. \n", + "\n", + " Alleles Specialty Population Metabolizer types isPlural \\\n", + "0 *17 NaN NaN Is \n", + "1 G NaN NaN Is \n", + "2 *11/*11 NaN NaN Is \n", + "3 *1/*18 NaN NaN Is \n", + "4 *1/*2 NaN NaN Is \n", + "\n", + " Is/Is Not associated Direction of effect PD/PK terms \\\n", + "0 Associated with decreased metabolism of \n", + "1 Not associated with increased dose of \n", + "2 Associated with decreased dose of \n", + "3 Associated with increased concentrations of \n", + "4 Associated with increased trough concentration of \n", + "\n", + " Multiple drugs And/or Population types \\\n", + "0 NaN NaN \n", + "1 NaN in people with \n", + "2 NaN NaN \n", + "3 NaN in people with \n", + "4 and NaN \n", + "\n", + " Population Phenotypes or diseases \\\n", + "0 NaN \n", + "1 Other:an international normalized ratio (INR) of 2.0-3.0 \n", + "2 NaN \n", + "3 Disease:HIV Infections \n", + "4 NaN \n", + "\n", + " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n", + "0 NaN *1 \n", + "1 NaN A \n", + "2 NaN NaN \n", + "3 NaN *1/*1 \n", + "4 NaN NaN \n", + "\n", + " Comparison Metabolizer types \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN " + ] + }, + "execution_count": 245, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Looking at the data - output suppressed for brevity\n", + "var_drug_ann.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 246, + "id": "12dada29-2a14-4992-acf4-45115aed57d2", + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Variant Annotation IDVariant/HaplotypesGeneDrug(s)PMIDPhenotype CategorySignificanceNotesSentenceAllelesSpecialty PopulationAssay typeMetabolizer typesisPluralIs/Is Not associatedDirection of effectFunctional termsGene/gene productWhen treated with/exposed to/when assayed withMultiple drugs And/orCell typeComparison Allele(s) or Genotype(s)Comparison Metabolizer types
01451148445CYP2C19*1, CYP2C19*17CYP2C19normeperidine30902024NaNnot statedIn other in vitro experiments, normeperidine formation was significantly correlated with CYP2C19 activity, as measured by S-mephenytoin 4-hydroxylation.CYP2C19 *17/*17 is associated with increased formation of normeperidine as compared to CYP2C19 *1/*1 + *1/*17.*17/*17NaNin human liver microsomesNaNIsAssociated withincreasedformation ofNaNNaNNaNNaN*1/*1 + *1/*17NaN
11447814273rs9923231VKORC1NaN26847243OthernoNaNAllele T is not associated with transcription of VKORC1 in HepG2 cells as compared to allele C.TNaNluciferase assayNaNIsNot associated withNaNtranscription ofVKORC1NaNNaNin HepG2 cellsCNaN
21447814277rs56314408VKORC1NaN26847243OtheryesIn the European population, this SNPs is in high LD with rs9923231 but not other populations. This SNP disrupts a binding motif for transcription factor TFAP2A/C.Allele C is associated with increased transcription of VKORC1 in HepG2 cells as compared to allele T.CNaNluciferase assayNaNIsAssociated withincreasedtranscription ofVKORC1NaNNaNin HepG2 cellsTNaN
31447990384rs1065852CYP2D6bufuralol2211621Metabolism/PKnot statedIn vitro experiments showed a significant decrease in CYP2D6 activity for the variant construct expressed in COS-1 cells as compared to wild-type.Allele A is associated with decreased activity of CYP2D6 when assayed with bufuralol in COS-1 cells as compared to allele G.ANaNNaNNaNIsAssociated withdecreasedactivity ofCYP2D6when assayed withNaNin COS-1 cellsGNaN
41448281185CYP2B6*1, CYP2B6*6CYP2B6bupropion27439448EfficacyyesThe ratio of hydroxybupropion versus bupropion (AUC_hyd/ AUC_bup) in terms of area under the time-concentration curve (AUC) was used to assay CYP2B6 activity.CYP2B6 *1/*1 is associated with increased activity of CYP2B6 when assayed with bupropion as compared to CYP2B6 *1/*6.*1/*1NaNNaNNaNIsAssociated withincreasedactivity ofCYP2B6when assayed withNaNNaN*1/*6NaN
\n", + "
" + ], + "text/plain": [ + " Variant Annotation ID Variant/Haplotypes Gene Drug(s) \\\n", + "0 1451148445 CYP2C19*1, CYP2C19*17 CYP2C19 normeperidine \n", + "1 1447814273 rs9923231 VKORC1 NaN \n", + "2 1447814277 rs56314408 VKORC1 NaN \n", + "3 1447990384 rs1065852 CYP2D6 bufuralol \n", + "4 1448281185 CYP2B6*1, CYP2B6*6 CYP2B6 bupropion \n", + "\n", + " PMID Phenotype Category Significance \\\n", + "0 30902024 NaN not stated \n", + "1 26847243 Other no \n", + "2 26847243 Other yes \n", + "3 2211621 Metabolism/PK not stated \n", + "4 27439448 Efficacy yes \n", + "\n", + " Notes \\\n", + "0 In other in vitro experiments, normeperidine formation was significantly correlated with CYP2C19 activity, as measured by S-mephenytoin 4-hydroxylation. \n", + "1 NaN \n", + "2 In the European population, this SNPs is in high LD with rs9923231 but not other populations. This SNP disrupts a binding motif for transcription factor TFAP2A/C. \n", + "3 In vitro experiments showed a significant decrease in CYP2D6 activity for the variant construct expressed in COS-1 cells as compared to wild-type. \n", + "4 The ratio of hydroxybupropion versus bupropion (AUC_hyd/ AUC_bup) in terms of area under the time-concentration curve (AUC) was used to assay CYP2B6 activity. \n", + "\n", + " Sentence \\\n", + "0 CYP2C19 *17/*17 is associated with increased formation of normeperidine as compared to CYP2C19 *1/*1 + *1/*17. \n", + "1 Allele T is not associated with transcription of VKORC1 in HepG2 cells as compared to allele C. \n", + "2 Allele C is associated with increased transcription of VKORC1 in HepG2 cells as compared to allele T. \n", + "3 Allele A is associated with decreased activity of CYP2D6 when assayed with bufuralol in COS-1 cells as compared to allele G. \n", + "4 CYP2B6 *1/*1 is associated with increased activity of CYP2B6 when assayed with bupropion as compared to CYP2B6 *1/*6. \n", + "\n", + " Alleles Specialty Population Assay type Metabolizer types \\\n", + "0 *17/*17 NaN in human liver microsomes NaN \n", + "1 T NaN luciferase assay NaN \n", + "2 C NaN luciferase assay NaN \n", + "3 A NaN NaN NaN \n", + "4 *1/*1 NaN NaN NaN \n", + "\n", + " isPlural Is/Is Not associated Direction of effect Functional terms \\\n", + "0 Is Associated with increased formation of \n", + "1 Is Not associated with NaN transcription of \n", + "2 Is Associated with increased transcription of \n", + "3 Is Associated with decreased activity of \n", + "4 Is Associated with increased activity of \n", + "\n", + " Gene/gene product When treated with/exposed to/when assayed with \\\n", + "0 NaN NaN \n", + "1 VKORC1 NaN \n", + "2 VKORC1 NaN \n", + "3 CYP2D6 when assayed with \n", + "4 CYP2B6 when assayed with \n", + "\n", + " Multiple drugs And/or Cell type Comparison Allele(s) or Genotype(s) \\\n", + "0 NaN NaN *1/*1 + *1/*17 \n", + "1 NaN in HepG2 cells C \n", + "2 NaN in HepG2 cells T \n", + "3 NaN in COS-1 cells G \n", + "4 NaN NaN *1/*6 \n", + "\n", + " Comparison Metabolizer types \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN " + ] + }, + "execution_count": 246, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var_fa_ann.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "id": "70537798-e6fb-47f5-8ba2-6859c4b645a2", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Variant Annotation IDVariant/HaplotypesGeneDrug(s)PMIDPhenotype CategorySignificanceNotesSentenceAllelesSpecialty PopulationMetabolizer typesisPluralIs/Is Not associatedDirection of effectSide effect/efficacy/otherPhenotypeMultiple phenotypes And/orWhen treated with/exposed to/when assayed withMultiple drugs And/orPopulation typesPopulation Phenotypes or diseasesMultiple phenotypes or diseases And/orComparison Allele(s) or Genotype(s)Comparison Metabolizer types
01449169911HLA-B*35:08HLA-Blamotrigine29238301ToxicitynoThe allele was not significant when comparing allele frequency in cases of severe cutaneous adverse reactions (SCAR), Stevens-Johnson Syndrome (SJS) and Maculopapular Exanthema (MPE) (1/15) and controls (individuals without AEs who took lamotrigine) (0/50). The allele was significant when comparing between cases (1/15) and the general population (1/986).HLA-B *35:08 is not associated with likelihood of Maculopapular Exanthema, severe cutaneous adverse reactions or Stevens-Johnson Syndrome when treated with lamotrigine in people with Epilepsy.*35:08NaNNaNIsNot associated withNaNlikelihood ofSide Effect:Maculopapular Exanthema, Side Effect:severe cutaneous adverse reactions, Side Effect:Stevens-Johnson Syndromeorwhen treated withNaNin people withDisease:EpilepsyNaNNaNNaN
1982022165rs45607939NAT2sulfamethoxazole / trimethoprim22850190ToxicitynoMinor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls.Allele T is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection.TNaNNaNIsNot associated withincreasedrisk ofDisease:HypersensitivityNaNwhen treated withNaNin people withDisease:InfectionNaNNaNNaN
2982022148rs1799930NAT2sulfamethoxazole / trimethoprim22850190ToxicitynoMinor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls.Allele A is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection.ANaNNaNIsNot associated withincreasedrisk ofDisease:HypersensitivityNaNwhen treated withNaNin people withDisease:InfectionNaNNaNNaN
31451283480rs16969968CHRNA5NaN22071378Otheryesthis was from meta-analysis of 27 studies but the number of total cases and the risk allele not clearly specified. Minor allele frequency was given for A allele. Introduction states that variant is Asp398Asn, where Asn (A allele) has lower nicotine response than Asp (G allele) and may be at greater risk for nicotine addiction.Allele A is associated with increased severity of Tobacco Use Disorder in people with Tobacco Use Disorder.ANaNNaNIsAssociated withincreasedseverity ofOther:Tobacco Use DisorderNaNNaNNaNin people withOther:Tobacco Use DisorderNaNNaNNaN
41444696916rs267606617MT-RNR1streptomycin7689389Toxicitynot statedPedigree analysis with 3 separate families. Within the maternal lines, 15 individuals had the 1555G variant, took aminoglycoside antibiotics, and developed hearing loss. 100% of individuals with the 1555G variant who took aminoglycosides developed hearing loss. Homoplasmic. Please note that no statistical analyses were done.Allele G is associated with Ototoxicity when treated with streptomycin as compared to allele A.GNaNNaNIsAssociated withNaNNaNSide Effect:Ototoxicityandwhen treated withNaNNaNNaNNaNANaN
\n", + "
" + ], + "text/plain": [ + " Variant Annotation ID Variant/Haplotypes Gene \\\n", + "0 1449169911 HLA-B*35:08 HLA-B \n", + "1 982022165 rs45607939 NAT2 \n", + "2 982022148 rs1799930 NAT2 \n", + "3 1451283480 rs16969968 CHRNA5 \n", + "4 1444696916 rs267606617 MT-RNR1 \n", + "\n", + " Drug(s) PMID Phenotype Category Significance \\\n", + "0 lamotrigine 29238301 Toxicity no \n", + "1 sulfamethoxazole / trimethoprim 22850190 Toxicity no \n", + "2 sulfamethoxazole / trimethoprim 22850190 Toxicity no \n", + "3 NaN 22071378 Other yes \n", + "4 streptomycin 7689389 Toxicity not stated \n", + "\n", + " Notes \\\n", + "0 The allele was not significant when comparing allele frequency in cases of severe cutaneous adverse reactions (SCAR), Stevens-Johnson Syndrome (SJS) and Maculopapular Exanthema (MPE) (1/15) and controls (individuals without AEs who took lamotrigine) (0/50). The allele was significant when comparing between cases (1/15) and the general population (1/986). \n", + "1 Minor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls. \n", + "2 Minor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls. \n", + "3 this was from meta-analysis of 27 studies but the number of total cases and the risk allele not clearly specified. Minor allele frequency was given for A allele. Introduction states that variant is Asp398Asn, where Asn (A allele) has lower nicotine response than Asp (G allele) and may be at greater risk for nicotine addiction. \n", + "4 Pedigree analysis with 3 separate families. Within the maternal lines, 15 individuals had the 1555G variant, took aminoglycoside antibiotics, and developed hearing loss. 100% of individuals with the 1555G variant who took aminoglycosides developed hearing loss. Homoplasmic. Please note that no statistical analyses were done. \n", + "\n", + " Sentence \\\n", + "0 HLA-B *35:08 is not associated with likelihood of Maculopapular Exanthema, severe cutaneous adverse reactions or Stevens-Johnson Syndrome when treated with lamotrigine in people with Epilepsy. \n", + "1 Allele T is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection. \n", + "2 Allele A is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection. \n", + "3 Allele A is associated with increased severity of Tobacco Use Disorder in people with Tobacco Use Disorder. \n", + "4 Allele G is associated with Ototoxicity when treated with streptomycin as compared to allele A. \n", + "\n", + " Alleles Specialty Population Metabolizer types isPlural \\\n", + "0 *35:08 NaN NaN Is \n", + "1 T NaN NaN Is \n", + "2 A NaN NaN Is \n", + "3 A NaN NaN Is \n", + "4 G NaN NaN Is \n", + "\n", + " Is/Is Not associated Direction of effect Side effect/efficacy/other \\\n", + "0 Not associated with NaN likelihood of \n", + "1 Not associated with increased risk of \n", + "2 Not associated with increased risk of \n", + "3 Associated with increased severity of \n", + "4 Associated with NaN NaN \n", + "\n", + " Phenotype \\\n", + "0 Side Effect:Maculopapular Exanthema, Side Effect:severe cutaneous adverse reactions, Side Effect:Stevens-Johnson Syndrome \n", + "1 Disease:Hypersensitivity \n", + "2 Disease:Hypersensitivity \n", + "3 Other:Tobacco Use Disorder \n", + "4 Side Effect:Ototoxicity \n", + "\n", + " Multiple phenotypes And/or When treated with/exposed to/when assayed with \\\n", + "0 or when treated with \n", + "1 NaN when treated with \n", + "2 NaN when treated with \n", + "3 NaN NaN \n", + "4 and when treated with \n", + "\n", + " Multiple drugs And/or Population types Population Phenotypes or diseases \\\n", + "0 NaN in people with Disease:Epilepsy \n", + "1 NaN in people with Disease:Infection \n", + "2 NaN in people with Disease:Infection \n", + "3 NaN in people with Other:Tobacco Use Disorder \n", + "4 NaN NaN NaN \n", + "\n", + " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN A \n", + "\n", + " Comparison Metabolizer types \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN " + ] + }, + "execution_count": 247, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var_pheno_ann.head()" + ] + }, + { + "cell_type": "markdown", + "id": "3b84a02e-1791-4a6b-8223-d904442ddf7c", + "metadata": {}, + "source": [ + "The 3 annotations tables provide evidence for the clinical annotations, can be connected by joining with the `clinical_ann_evidence.tsv` file. In general a clinical annotation can have multiple variant annotations as evidence, and a variant annotation can be used as evidence for multiple clinical annotations (in theory, I've not actually observed this).\n", + "\n", + "Each of these tables has a \"Direction of effect\" column, and the type of \"effect\" is different for each - likelihood of side effects, formation of product, metabolism of drug, etc.\n", + "\n", + "**Question for OT**: when we say \"direction of effect\", do we mean any of these \"effects\"? I.e. should we include all three of these tables or focus on one?" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "d43fd147-43b4-4541-9595-f757da937e60", + "metadata": {}, + "outputs": [], + "source": [ + "clinical_annotations = read_tsv_to_df(os.path.join(data_dir, 'clinical_annotations.tsv'))\n", + "clinical_ann_evidence = read_tsv_to_df(os.path.join(data_dir, 'clinical_ann_evidence.tsv'))\n", + "clinical_ann_alleles = read_tsv_to_df(os.path.join(data_dir, 'clinical_ann_alleles.tsv'))" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "1a553aac-a9f7-4579-ab7c-05fa6f90ddfe", + "metadata": {}, + "outputs": [], + "source": [ + "main_df = pd.merge(clinical_annotations, clinical_ann_evidence, how='left', on=ID_COL_NAME)\n", + "main_df = main_df[[\n", + " # Main table\n", + " 'Clinical Annotation ID', 'Variant/Haplotypes', 'Gene', 'Level of Evidence', 'Phenotype Category', 'Drug(s)', 'Phenotype(s)',\n", + " # Evidence table\n", + " 'Evidence ID', 'Evidence Type', 'PMID', 'Summary',\n", + "]]" + ] + }, + { + "cell_type": "markdown", + "id": "5945f09b-9516-4d65-ad75-d27a5a1890cf", + "metadata": {}, + "source": [ + "#### Example clinical annotation\n", + "\n", + "[Top of page](#Table-of-contents)\n", + "\n", + "Looking at [981755803](https://www.pharmgkb.org/clinicalAnnotation/981755803), which has all three types of variant annotation evidence as well as label/guideline evidence." + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "f5bb8c9e-baeb-4ef6-9373-7c38a333601c", + "metadata": {}, + "outputs": [], + "source": [ + "df_981755803 = main_df[main_df[ID_COL_NAME] == '981755803']\n", + "\n", + "df_981755803_drug = pd.merge(df_981755803, var_drug_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_drug'))\n", + "df_981755803_pheno = pd.merge(df_981755803, var_pheno_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_pheno'))\n", + "df_981755803_fa = pd.merge(df_981755803, var_fa_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_fa'))" + ] + }, + { + "cell_type": "code", + "execution_count": 229, + "id": "811adf62-6f79-4451-9a3e-2772057e4a01", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of evidence 30\n", + "Number of var/drug evidence 24\n", + "Number of var/fa evidence 2\n", + "Number of var/pheno evidence 2\n" + ] + } + ], + "source": [ + "print('Number of evidence', len(df_981755803))\n", + "print('Number of var/drug evidence', len(df_981755803_drug))\n", + "print('Number of var/fa evidence', len(df_981755803_fa))\n", + "print('Number of var/pheno evidence', len(df_981755803_pheno))" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "id": "c4a15bc6-90fc-4d4b-bd0e-621b8ffd6093", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Clinical Annotation IDVariant/HaplotypesGeneLevel of EvidencePhenotype CategoryDrug(s)Phenotype(s)Evidence IDEvidence TypePMIDSummaryVariant Annotation IDVariant/Haplotypes_var_drugGene_var_drugDrug(s)_var_drugPMID_var_drugPhenotype Category_var_drugSignificanceNotesSentenceAllelesSpecialty PopulationMetabolizer typesisPluralIs/Is Not associatedDirection of effectPD/PK termsMultiple drugs And/orPopulation typesPopulation Phenotypes or diseasesMultiple phenotypes or diseases And/orComparison Allele(s) or Genotype(s)Comparison Metabolizer types
0981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis981755665Variant Drug Annotation21083385Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.981755665rs75527207CFTRivacaftor21083385Efficacynot statedClinical trials were carried out to test efficacy of ivacaftor selecting only patients with the CFTR G551D mutation on at least one allele (genotype AA or AG).Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.AA + AGNaNNaNAreAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
1981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis981755678Variant Drug Annotation22047557Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.981755678rs75527207CFTRivacaftor22047557Efficacynot statedA clinical trial that selected patients with the G551D CFTR mutation (rs75527207 genotype AA or AG). Patients without this mutation were excluded. One patient included in the placebo group was homozygous for F508del (rs113993960 genotype del/del).Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.AA + AGNaNNaNAreAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
2981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis982009991Variant Drug Annotation23590265Allele A is associated with response to ivacaftor in children with Cystic Fibrosis.982009991rs75527207CFTRivacaftor23590265EfficacyyesPatients aged 6-11 at time of screening who had at least one allele with the G551D mutation (allele A at position rs75527207) were recruited for this trial. Ivacaftor is only indicated in CF patients with this mutation. Significant improvements in lung function were seen in the ivacaftor treatment group compared to placebo.Allele A is associated with response to ivacaftor in children with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin children withDisease:Cystic FibrosisNaNNaNNaN
3981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1183629335Variant Drug Annotation24066763Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis.1183629335rs75527207CFTRivacaftor24066763Efficacynot statedCase report of a female homozygous for the G551D CFTR mutation (genotype AA) in which ivacaftor was efficacious: increased absolute change in percent of predicted FEV1, increased weight and walk distance and decreased sweat chloride levels over a 12 month course with no sign of plateau to date.Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis.AANaNNaNIsAssociated withNaNresponse toNaNin women withDisease:Cystic FibrosisNaNNaNNaN
4981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1448423752Variant Drug Annotation27773592Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG.1448423752rs75527207CFTRivacaftor27773592EfficacyyesThe outcome of change in sweat chloride was correlated with change in FEV1 in patients with cystic fibrosis and found to have improved results for both.Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG.AA + AGPediatricNaNIsAssociated withincreasedresponse toNaNin people withDisease:Cystic FibrosisNaNGGNaN
5981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449191908Variant Drug Annotation25682022Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449191908rs75527207CFTRivacaftor25682022Efficacynot statedStudy was an expanded access program targeted at patients with severe lung disease and was not powered to determine efficacy. Majority of patients reported an improvement in FEV following 24 weeks of treatment.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
6981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192055Variant Drug Annotation28711222Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192055rs75527207CFTRivacaftor28711222EfficacyyesG551D allele. Statistically significant increases in FEV1, weight and BMI and statistically significant decreases in sweat chloride level, the number of days of antibiotic treatment and in the use of some maintenance treatments.; No differences in bone density, pancreatic insufficiency and cystic fibrosis related diabetes were observed.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
7981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192093Variant Drug Annotation25311995Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192093rs75527207CFTRivacaftor25311995Efficacynot statedG551 D allele. Increases in FEV1, body weight, CFQ-R scores and time to first pulmonary exacerbation were observed.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
8981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192439Variant Drug Annotation28611235Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192439rs75527207CFTRivacaftor28611235EfficacyyesG551D allele. FEV1, Alfred wellness score, exercise time, CFQ-R score and sweat chloride levels showed a significant improvement following ivacaftor treatment as compared to placebo while other outcomes (VO2, ventilation, cardiac response nd recovery following exercise) did not.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.ANaNNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
9981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192481Variant Drug Annotation26135562Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192481rs75527207CFTRivacaftor26135562EfficacyyesG551D allele. Analysis of CFQ-R scores from participants in the STRIVE trial. Scores for eating problems, health perceptions, physical functioning, respiratory symptoms, social functioning, treatment burden and vitality showed significant improvements following ivacaftor treatment.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
10981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192494Variant Drug Annotation25171465Allele A is associated with response to ivacaftor in children with Cystic Fibrosis.1449192494rs75527207CFTRivacaftor25171465Efficacynot statedCase study of a pediatric cystic fibrosis patient. Improvements in sweat chloride, BMI, bronchiectasis and lung function reported following ivacaftor treatment.Allele A is associated with response to ivacaftor in children with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin children withDisease:Cystic FibrosisNaNNaNNaN
11981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192576Variant Drug Annotation25755212Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192576rs75527207CFTRivacaftor25755212EfficacyyesPost hoc analysis of clinical outcomes of the STRIVE and ENVISION trials. Participants were split into tertiles based on FEV1 score and outcomes in change in baseline FEV1, body weight, CFQ-R score and sweat chloride levels as well as number of days of pulmonary exacerbation were assessed. All outcomes were significantly improved in the upper tertile, all outcomes apart from number of days of pulmonary exacerbation were significantly improved in the middle tertile and absolute change in FEV1, body weight and sweat chloride levels were significantly improved in the lower tertile.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
12981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192615Variant Drug Annotation26568242Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192615rs75527207CFTRivacaftor26568242EfficacyyesResponse measured by changes in sweat chloride levels, FEV1 and BMI.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
13981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192709Variant Drug Annotation25473543Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192709rs75527207CFTRivacaftor25473543Efficacynot statedG551D allele. Case report of three patients with the F508del/G551D genotype. Reported improvements in FEV1, body weight, sweat chloride levels and scores in the respiratory domain of the CFQ-R.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.ANaNNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
14981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192721Variant Drug Annotation25145599Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1449192721rs75527207CFTRivacaftor25145599EfficacyyesG551D allele. Significant increases in %FVC and %FEV1 compared to baseline were seen at 6 months of ivacaftor treatment, but both measures declined to baseline by 12 months of ivacaftor treatment. Significant improvements in BMI, body weight, sinus disease status and sweat chloride levels were seen at 12 months of ivacaftor treatment.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
15981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1450043422Variant Drug Annotation23628510Allele A is associated with response to ivacaftor in children with Cystic Fibrosis.1450043422rs75527207CFTRivacaftor23628510EfficacyyesNaNAllele A is associated with response to ivacaftor in children with Cystic Fibrosis.APediatricNaNIsAssociated withNaNresponse toNaNin children withDisease:Cystic FibrosisNaNNaNNaN
16981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1184512440Variant Drug Annotation25049054Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1184512440rs75527207CFTRivacaftor25049054EfficacyyesPatients with at least one G551D-CFTR allele were recruited and treated with ivacaftor for one year. Mean weight and BMI improved at 6 months from baseline, but only mean weight was increased again at 12 months. Mean percentage FVC, FEV1 and FEF25-75% returned to baseline levels by 12 months of treatment.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.ANaNNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
17981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis981755746Variant Drug Annotation22942289Allele A is associated with increased response to ivacaftor.981755746rs75527207CFTRivacaftor22942289Efficacynot statedIn vitro studies using proteoliposomes containing CFTR, or CFTR with the G551D mutation (rs75527207 allele A), or CFTR with the F508del mutation (rs113993960 allele del). Ivacaftor in the presence of ATP potentiated channel activity of CFTR-G551D.Allele A is associated with increased response to ivacaftor.ANaNNaNIsAssociated withincreasedresponse toNaNNaNNaNNaNNaNNaN
18981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis981755699Variant Drug Annotation19846789Allele A is associated with increased response to ivacaftor.981755699rs75527207CFTRivacaftor19846789EfficacyyesIn vitro assays that show ivacaftor potentiates CFTR with the G551D mutation (rs75527207 allele A) - see details described in study parameters.Allele A is associated with increased response to ivacaftor.ANaNNaNIsAssociated withincreasedresponse toNaNNaNNaNNaNNaNNaN
19981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis981755787Variant Drug Annotation22293084Allele A is associated with increased response to ivacaftor.981755787rs75527207CFTRivacaftor22293084Efficacyyesas compared to baseline. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Before treatment, cells were activated by exposure to PKA and ATP before ivacaftor treatment. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Cells expressing G551D-CFTR (rs75527207 allele A) responded to ivacaftor treatment with a significantly enhanced channel open probability and increased chloride transport. Single channel current amplitude at 80mV was not significantly enhanced.Allele A is associated with increased response to ivacaftor.ANaNNaNIsAssociated withincreasedresponse toNaNNaNNaNNaNNaNNaN
20981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1043737597Variant Drug Annotation23757359Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.1043737597rs75527207CFTRivacaftor23757359EfficacyyesA retrospective study of patients in Germany with severe Cystic Fibrosis (FEV1 <40%predicted) with the G551D mutation who were treated with ivacaftor. On average, FEV1and body weight increased significantly, though response was variable in this patient group and several patients discontinued ivacaftor for different complications.Allele A is associated with response to ivacaftor in people with Cystic Fibrosis.ANaNNaNIsAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
21981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis982006840Variant Drug Annotation23313410Allele A is associated with response to ivacaftor in men with Cystic Fibrosis.982006840rs75527207CFTRivacaftor23313410Efficacynot statedA case report of lung function improvements 6 months after treatment with ivacaftor in a male patient with severe lung disease - he had the CFTR G511D (rsrs75527207 allele A)/deltaF508 genotype (rs113993960 del CTT) and so could be given ivacaftor.Allele A is associated with response to ivacaftor in men with Cystic Fibrosis.ANaNNaNIsAssociated withNaNresponse toNaNin men withDisease:Cystic FibrosisNaNNaNNaN
22981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1446903789Variant Drug Annotation24461666Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.1446903789rs75527207CFTRivacaftor24461666EfficacyyesThe authors wanted to assess the efficacy of ivacaftor in patients with cystic fibrosis who have normal spirometry. The authors assessed lung function improvement in patients using lung clearance index (LCI) as well as forced expiratory volume in 1 second (FEV1), and only included patients with < 90% FEV1 values. The primary outcome was change in LCI from baseline. This was a phase 2, multi-centre, placebo-controlled, 2x2 crossover study. One group, sequence 1, took placebo first, followed by 28 day washout, then took ivacaftor 150 mg 2x daily for 4 weeks. The second group had the sequence of treatment reversed.Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.AA + AGPediatricNaNAreAssociated withNaNresponse toNaNin people withDisease:Cystic FibrosisNaNNaNNaN
23981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1448099051Variant Drug Annotation27158673Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG.1448099051rs75527207CFTRivacaftor27158673EfficacyyesMeasured in adult patients, with changes in lung volume, sweat chloride, distensibility, wall thickness, expiratory lumen area, and inspiratory lumen area measured before starting ivacaftor and 48 hour after starting ivacaftor.Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG.AA + AGNaNNaNAreAssociated withincreasedresponse toNaNin people withDisease:Cystic FibrosisNaNGGNaN
\n", + "
" + ], + "text/plain": [ + " Clinical Annotation ID Variant/Haplotypes Gene Level of Evidence \\\n", + "0 981755803 rs75527207 CFTR 1A \n", + "1 981755803 rs75527207 CFTR 1A \n", + "2 981755803 rs75527207 CFTR 1A \n", + "3 981755803 rs75527207 CFTR 1A \n", + "4 981755803 rs75527207 CFTR 1A \n", + "5 981755803 rs75527207 CFTR 1A \n", + "6 981755803 rs75527207 CFTR 1A \n", + "7 981755803 rs75527207 CFTR 1A \n", + "8 981755803 rs75527207 CFTR 1A \n", + "9 981755803 rs75527207 CFTR 1A \n", + "10 981755803 rs75527207 CFTR 1A \n", + "11 981755803 rs75527207 CFTR 1A \n", + "12 981755803 rs75527207 CFTR 1A \n", + "13 981755803 rs75527207 CFTR 1A \n", + "14 981755803 rs75527207 CFTR 1A \n", + "15 981755803 rs75527207 CFTR 1A \n", + "16 981755803 rs75527207 CFTR 1A \n", + "17 981755803 rs75527207 CFTR 1A \n", + "18 981755803 rs75527207 CFTR 1A \n", + "19 981755803 rs75527207 CFTR 1A \n", + "20 981755803 rs75527207 CFTR 1A \n", + "21 981755803 rs75527207 CFTR 1A \n", + "22 981755803 rs75527207 CFTR 1A \n", + "23 981755803 rs75527207 CFTR 1A \n", + "\n", + " Phenotype Category Drug(s) Phenotype(s) Evidence ID \\\n", + "0 Efficacy ivacaftor Cystic Fibrosis 981755665 \n", + "1 Efficacy ivacaftor Cystic Fibrosis 981755678 \n", + "2 Efficacy ivacaftor Cystic Fibrosis 982009991 \n", + "3 Efficacy ivacaftor Cystic Fibrosis 1183629335 \n", + "4 Efficacy ivacaftor Cystic Fibrosis 1448423752 \n", + "5 Efficacy ivacaftor Cystic Fibrosis 1449191908 \n", + "6 Efficacy ivacaftor Cystic Fibrosis 1449192055 \n", + "7 Efficacy ivacaftor Cystic Fibrosis 1449192093 \n", + "8 Efficacy ivacaftor Cystic Fibrosis 1449192439 \n", + "9 Efficacy ivacaftor Cystic Fibrosis 1449192481 \n", + "10 Efficacy ivacaftor Cystic Fibrosis 1449192494 \n", + "11 Efficacy ivacaftor Cystic Fibrosis 1449192576 \n", + "12 Efficacy ivacaftor Cystic Fibrosis 1449192615 \n", + "13 Efficacy ivacaftor Cystic Fibrosis 1449192709 \n", + "14 Efficacy ivacaftor Cystic Fibrosis 1449192721 \n", + "15 Efficacy ivacaftor Cystic Fibrosis 1450043422 \n", + "16 Efficacy ivacaftor Cystic Fibrosis 1184512440 \n", + "17 Efficacy ivacaftor Cystic Fibrosis 981755746 \n", + "18 Efficacy ivacaftor Cystic Fibrosis 981755699 \n", + "19 Efficacy ivacaftor Cystic Fibrosis 981755787 \n", + "20 Efficacy ivacaftor Cystic Fibrosis 1043737597 \n", + "21 Efficacy ivacaftor Cystic Fibrosis 982006840 \n", + "22 Efficacy ivacaftor Cystic Fibrosis 1446903789 \n", + "23 Efficacy ivacaftor Cystic Fibrosis 1448099051 \n", + "\n", + " Evidence Type PMID \\\n", + "0 Variant Drug Annotation 21083385 \n", + "1 Variant Drug Annotation 22047557 \n", + "2 Variant Drug Annotation 23590265 \n", + "3 Variant Drug Annotation 24066763 \n", + "4 Variant Drug Annotation 27773592 \n", + "5 Variant Drug Annotation 25682022 \n", + "6 Variant Drug Annotation 28711222 \n", + "7 Variant Drug Annotation 25311995 \n", + "8 Variant Drug Annotation 28611235 \n", + "9 Variant Drug Annotation 26135562 \n", + "10 Variant Drug Annotation 25171465 \n", + "11 Variant Drug Annotation 25755212 \n", + "12 Variant Drug Annotation 26568242 \n", + "13 Variant Drug Annotation 25473543 \n", + "14 Variant Drug Annotation 25145599 \n", + "15 Variant Drug Annotation 23628510 \n", + "16 Variant Drug Annotation 25049054 \n", + "17 Variant Drug Annotation 22942289 \n", + "18 Variant Drug Annotation 19846789 \n", + "19 Variant Drug Annotation 22293084 \n", + "20 Variant Drug Annotation 23757359 \n", + "21 Variant Drug Annotation 23313410 \n", + "22 Variant Drug Annotation 24461666 \n", + "23 Variant Drug Annotation 27158673 \n", + "\n", + " Summary \\\n", + "0 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "1 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "2 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n", + "3 Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis. \n", + "4 Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n", + "5 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "6 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "7 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "8 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "9 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "10 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n", + "11 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "12 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "13 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "14 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "15 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n", + "16 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "17 Allele A is associated with increased response to ivacaftor. \n", + "18 Allele A is associated with increased response to ivacaftor. \n", + "19 Allele A is associated with increased response to ivacaftor. \n", + "20 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "21 Allele A is associated with response to ivacaftor in men with Cystic Fibrosis. \n", + "22 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "23 Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n", + "\n", + " Variant Annotation ID Variant/Haplotypes_var_drug Gene_var_drug \\\n", + "0 981755665 rs75527207 CFTR \n", + "1 981755678 rs75527207 CFTR \n", + "2 982009991 rs75527207 CFTR \n", + "3 1183629335 rs75527207 CFTR \n", + "4 1448423752 rs75527207 CFTR \n", + "5 1449191908 rs75527207 CFTR \n", + "6 1449192055 rs75527207 CFTR \n", + "7 1449192093 rs75527207 CFTR \n", + "8 1449192439 rs75527207 CFTR \n", + "9 1449192481 rs75527207 CFTR \n", + "10 1449192494 rs75527207 CFTR \n", + "11 1449192576 rs75527207 CFTR \n", + "12 1449192615 rs75527207 CFTR \n", + "13 1449192709 rs75527207 CFTR \n", + "14 1449192721 rs75527207 CFTR \n", + "15 1450043422 rs75527207 CFTR \n", + "16 1184512440 rs75527207 CFTR \n", + "17 981755746 rs75527207 CFTR \n", + "18 981755699 rs75527207 CFTR \n", + "19 981755787 rs75527207 CFTR \n", + "20 1043737597 rs75527207 CFTR \n", + "21 982006840 rs75527207 CFTR \n", + "22 1446903789 rs75527207 CFTR \n", + "23 1448099051 rs75527207 CFTR \n", + "\n", + " Drug(s)_var_drug PMID_var_drug Phenotype Category_var_drug Significance \\\n", + "0 ivacaftor 21083385 Efficacy not stated \n", + "1 ivacaftor 22047557 Efficacy not stated \n", + "2 ivacaftor 23590265 Efficacy yes \n", + "3 ivacaftor 24066763 Efficacy not stated \n", + "4 ivacaftor 27773592 Efficacy yes \n", + "5 ivacaftor 25682022 Efficacy not stated \n", + "6 ivacaftor 28711222 Efficacy yes \n", + "7 ivacaftor 25311995 Efficacy not stated \n", + "8 ivacaftor 28611235 Efficacy yes \n", + "9 ivacaftor 26135562 Efficacy yes \n", + "10 ivacaftor 25171465 Efficacy not stated \n", + "11 ivacaftor 25755212 Efficacy yes \n", + "12 ivacaftor 26568242 Efficacy yes \n", + "13 ivacaftor 25473543 Efficacy not stated \n", + "14 ivacaftor 25145599 Efficacy yes \n", + "15 ivacaftor 23628510 Efficacy yes \n", + "16 ivacaftor 25049054 Efficacy yes \n", + "17 ivacaftor 22942289 Efficacy not stated \n", + "18 ivacaftor 19846789 Efficacy yes \n", + "19 ivacaftor 22293084 Efficacy yes \n", + "20 ivacaftor 23757359 Efficacy yes \n", + "21 ivacaftor 23313410 Efficacy not stated \n", + "22 ivacaftor 24461666 Efficacy yes \n", + "23 ivacaftor 27158673 Efficacy yes \n", + "\n", + " Notes \\\n", + "0 Clinical trials were carried out to test efficacy of ivacaftor selecting only patients with the CFTR G551D mutation on at least one allele (genotype AA or AG). \n", + "1 A clinical trial that selected patients with the G551D CFTR mutation (rs75527207 genotype AA or AG). Patients without this mutation were excluded. One patient included in the placebo group was homozygous for F508del (rs113993960 genotype del/del). \n", + "2 Patients aged 6-11 at time of screening who had at least one allele with the G551D mutation (allele A at position rs75527207) were recruited for this trial. Ivacaftor is only indicated in CF patients with this mutation. Significant improvements in lung function were seen in the ivacaftor treatment group compared to placebo. \n", + "3 Case report of a female homozygous for the G551D CFTR mutation (genotype AA) in which ivacaftor was efficacious: increased absolute change in percent of predicted FEV1, increased weight and walk distance and decreased sweat chloride levels over a 12 month course with no sign of plateau to date. \n", + "4 The outcome of change in sweat chloride was correlated with change in FEV1 in patients with cystic fibrosis and found to have improved results for both. \n", + "5 Study was an expanded access program targeted at patients with severe lung disease and was not powered to determine efficacy. Majority of patients reported an improvement in FEV following 24 weeks of treatment. \n", + "6 G551D allele. Statistically significant increases in FEV1, weight and BMI and statistically significant decreases in sweat chloride level, the number of days of antibiotic treatment and in the use of some maintenance treatments.; No differences in bone density, pancreatic insufficiency and cystic fibrosis related diabetes were observed. \n", + "7 G551 D allele. Increases in FEV1, body weight, CFQ-R scores and time to first pulmonary exacerbation were observed. \n", + "8 G551D allele. FEV1, Alfred wellness score, exercise time, CFQ-R score and sweat chloride levels showed a significant improvement following ivacaftor treatment as compared to placebo while other outcomes (VO2, ventilation, cardiac response nd recovery following exercise) did not. \n", + "9 G551D allele. Analysis of CFQ-R scores from participants in the STRIVE trial. Scores for eating problems, health perceptions, physical functioning, respiratory symptoms, social functioning, treatment burden and vitality showed significant improvements following ivacaftor treatment. \n", + "10 Case study of a pediatric cystic fibrosis patient. Improvements in sweat chloride, BMI, bronchiectasis and lung function reported following ivacaftor treatment. \n", + "11 Post hoc analysis of clinical outcomes of the STRIVE and ENVISION trials. Participants were split into tertiles based on FEV1 score and outcomes in change in baseline FEV1, body weight, CFQ-R score and sweat chloride levels as well as number of days of pulmonary exacerbation were assessed. All outcomes were significantly improved in the upper tertile, all outcomes apart from number of days of pulmonary exacerbation were significantly improved in the middle tertile and absolute change in FEV1, body weight and sweat chloride levels were significantly improved in the lower tertile. \n", + "12 Response measured by changes in sweat chloride levels, FEV1 and BMI. \n", + "13 G551D allele. Case report of three patients with the F508del/G551D genotype. Reported improvements in FEV1, body weight, sweat chloride levels and scores in the respiratory domain of the CFQ-R. \n", + "14 G551D allele. Significant increases in %FVC and %FEV1 compared to baseline were seen at 6 months of ivacaftor treatment, but both measures declined to baseline by 12 months of ivacaftor treatment. Significant improvements in BMI, body weight, sinus disease status and sweat chloride levels were seen at 12 months of ivacaftor treatment. \n", + "15 NaN \n", + "16 Patients with at least one G551D-CFTR allele were recruited and treated with ivacaftor for one year. Mean weight and BMI improved at 6 months from baseline, but only mean weight was increased again at 12 months. Mean percentage FVC, FEV1 and FEF25-75% returned to baseline levels by 12 months of treatment. \n", + "17 In vitro studies using proteoliposomes containing CFTR, or CFTR with the G551D mutation (rs75527207 allele A), or CFTR with the F508del mutation (rs113993960 allele del). Ivacaftor in the presence of ATP potentiated channel activity of CFTR-G551D. \n", + "18 In vitro assays that show ivacaftor potentiates CFTR with the G551D mutation (rs75527207 allele A) - see details described in study parameters. \n", + "19 as compared to baseline. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Before treatment, cells were activated by exposure to PKA and ATP before ivacaftor treatment. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Cells expressing G551D-CFTR (rs75527207 allele A) responded to ivacaftor treatment with a significantly enhanced channel open probability and increased chloride transport. Single channel current amplitude at 80mV was not significantly enhanced. \n", + "20 A retrospective study of patients in Germany with severe Cystic Fibrosis (FEV1 <40%predicted) with the G551D mutation who were treated with ivacaftor. On average, FEV1and body weight increased significantly, though response was variable in this patient group and several patients discontinued ivacaftor for different complications. \n", + "21 A case report of lung function improvements 6 months after treatment with ivacaftor in a male patient with severe lung disease - he had the CFTR G511D (rsrs75527207 allele A)/deltaF508 genotype (rs113993960 del CTT) and so could be given ivacaftor. \n", + "22 The authors wanted to assess the efficacy of ivacaftor in patients with cystic fibrosis who have normal spirometry. The authors assessed lung function improvement in patients using lung clearance index (LCI) as well as forced expiratory volume in 1 second (FEV1), and only included patients with < 90% FEV1 values. The primary outcome was change in LCI from baseline. This was a phase 2, multi-centre, placebo-controlled, 2x2 crossover study. One group, sequence 1, took placebo first, followed by 28 day washout, then took ivacaftor 150 mg 2x daily for 4 weeks. The second group had the sequence of treatment reversed. \n", + "23 Measured in adult patients, with changes in lung volume, sweat chloride, distensibility, wall thickness, expiratory lumen area, and inspiratory lumen area measured before starting ivacaftor and 48 hour after starting ivacaftor. \n", + "\n", + " Sentence \\\n", + "0 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "1 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "2 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n", + "3 Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis. \n", + "4 Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n", + "5 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "6 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "7 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "8 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "9 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "10 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n", + "11 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "12 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "13 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "14 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "15 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n", + "16 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "17 Allele A is associated with increased response to ivacaftor. \n", + "18 Allele A is associated with increased response to ivacaftor. \n", + "19 Allele A is associated with increased response to ivacaftor. \n", + "20 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "21 Allele A is associated with response to ivacaftor in men with Cystic Fibrosis. \n", + "22 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n", + "23 Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n", + "\n", + " Alleles Specialty Population Metabolizer types isPlural \\\n", + "0 AA + AG NaN NaN Are \n", + "1 AA + AG NaN NaN Are \n", + "2 A Pediatric NaN Is \n", + "3 AA NaN NaN Is \n", + "4 AA + AG Pediatric NaN Is \n", + "5 A Pediatric NaN Is \n", + "6 A Pediatric NaN Is \n", + "7 A Pediatric NaN Is \n", + "8 A NaN NaN Is \n", + "9 A Pediatric NaN Is \n", + "10 A Pediatric NaN Is \n", + "11 A Pediatric NaN Is \n", + "12 A Pediatric NaN Is \n", + "13 A NaN NaN Is \n", + "14 A Pediatric NaN Is \n", + "15 A Pediatric NaN Is \n", + "16 A NaN NaN Is \n", + "17 A NaN NaN Is \n", + "18 A NaN NaN Is \n", + "19 A NaN NaN Is \n", + "20 A NaN NaN Is \n", + "21 A NaN NaN Is \n", + "22 AA + AG Pediatric NaN Are \n", + "23 AA + AG NaN NaN Are \n", + "\n", + " Is/Is Not associated Direction of effect PD/PK terms \\\n", + "0 Associated with NaN response to \n", + "1 Associated with NaN response to \n", + "2 Associated with NaN response to \n", + "3 Associated with NaN response to \n", + "4 Associated with increased response to \n", + "5 Associated with NaN response to \n", + "6 Associated with NaN response to \n", + "7 Associated with NaN response to \n", + "8 Associated with NaN response to \n", + "9 Associated with NaN response to \n", + "10 Associated with NaN response to \n", + "11 Associated with NaN response to \n", + "12 Associated with NaN response to \n", + "13 Associated with NaN response to \n", + "14 Associated with NaN response to \n", + "15 Associated with NaN response to \n", + "16 Associated with NaN response to \n", + "17 Associated with increased response to \n", + "18 Associated with increased response to \n", + "19 Associated with increased response to \n", + "20 Associated with NaN response to \n", + "21 Associated with NaN response to \n", + "22 Associated with NaN response to \n", + "23 Associated with increased response to \n", + "\n", + " Multiple drugs And/or Population types Population Phenotypes or diseases \\\n", + "0 NaN in people with Disease:Cystic Fibrosis \n", + "1 NaN in people with Disease:Cystic Fibrosis \n", + "2 NaN in children with Disease:Cystic Fibrosis \n", + "3 NaN in women with Disease:Cystic Fibrosis \n", + "4 NaN in people with Disease:Cystic Fibrosis \n", + "5 NaN in people with Disease:Cystic Fibrosis \n", + "6 NaN in people with Disease:Cystic Fibrosis \n", + "7 NaN in people with Disease:Cystic Fibrosis \n", + "8 NaN in people with Disease:Cystic Fibrosis \n", + "9 NaN in people with Disease:Cystic Fibrosis \n", + "10 NaN in children with Disease:Cystic Fibrosis \n", + "11 NaN in people with Disease:Cystic Fibrosis \n", + "12 NaN in people with Disease:Cystic Fibrosis \n", + "13 NaN in people with Disease:Cystic Fibrosis \n", + "14 NaN in people with Disease:Cystic Fibrosis \n", + "15 NaN in children with Disease:Cystic Fibrosis \n", + "16 NaN in people with Disease:Cystic Fibrosis \n", + "17 NaN NaN NaN \n", + "18 NaN NaN NaN \n", + "19 NaN NaN NaN \n", + "20 NaN in people with Disease:Cystic Fibrosis \n", + "21 NaN in men with Disease:Cystic Fibrosis \n", + "22 NaN in people with Disease:Cystic Fibrosis \n", + "23 NaN in people with Disease:Cystic Fibrosis \n", + "\n", + " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN GG \n", + "5 NaN NaN \n", + "6 NaN NaN \n", + "7 NaN NaN \n", + "8 NaN NaN \n", + "9 NaN NaN \n", + "10 NaN NaN \n", + "11 NaN NaN \n", + "12 NaN NaN \n", + "13 NaN NaN \n", + "14 NaN NaN \n", + "15 NaN NaN \n", + "16 NaN NaN \n", + "17 NaN NaN \n", + "18 NaN NaN \n", + "19 NaN NaN \n", + "20 NaN NaN \n", + "21 NaN NaN \n", + "22 NaN NaN \n", + "23 NaN GG \n", + "\n", + " Comparison Metabolizer types \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "5 NaN \n", + "6 NaN \n", + "7 NaN \n", + "8 NaN \n", + "9 NaN \n", + "10 NaN \n", + "11 NaN \n", + "12 NaN \n", + "13 NaN \n", + "14 NaN \n", + "15 NaN \n", + "16 NaN \n", + "17 NaN \n", + "18 NaN \n", + "19 NaN \n", + "20 NaN \n", + "21 NaN \n", + "22 NaN \n", + "23 NaN " + ] + }, + "execution_count": 248, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_981755803_drug" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "86225d03-93ea-4d22-8569-0bb1360f4d66", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Clinical Annotation IDVariant/HaplotypesGeneLevel of EvidencePhenotype CategoryDrug(s)Phenotype(s)Evidence IDEvidence TypePMIDSummaryVariant Annotation IDVariant/Haplotypes_var_faGene_var_faDrug(s)_var_faPMID_var_faPhenotype Category_var_faSignificanceNotesSentenceAllelesSpecialty PopulationAssay typeMetabolizer typesisPluralIs/Is Not associatedDirection of effectFunctional termsGene/gene productWhen treated with/exposed to/when assayed withMultiple drugs And/orCell typeComparison Allele(s) or Genotype(s)Comparison Metabolizer types
0981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1043737620Variant Functional Assay Annotation23757361Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells.1043737620rs75527207CFTRivacaftor23757361Efficacyyescompared to no treatment. Ivacaftor stimulated CFTR activity in CFTR-G551D expressing CHO cells (as measured by iodine efflux).Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells.ANaNNaNNaNIsAssociated withincreasedactivity ofCFTRwhen treated withNaNin transfected CHO cellsNaNNaN
1981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1043737636Variant Functional Assay Annotation23891399Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines.1043737636rs75527207CFTRivacaftor23891399EfficacyyesG551D allele. 55.3 fold increase in chloride transport upon ivacaftor treatment as compared to baseline (no ivacaftor treatment).Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines.ANaNNaNNaNIsAssociated withNaNactivity ofCFTRwhen treated withNaNin FRT cell linesNaNNaN
\n", + "
" + ], + "text/plain": [ + " Clinical Annotation ID Variant/Haplotypes Gene Level of Evidence \\\n", + "0 981755803 rs75527207 CFTR 1A \n", + "1 981755803 rs75527207 CFTR 1A \n", + "\n", + " Phenotype Category Drug(s) Phenotype(s) Evidence ID \\\n", + "0 Efficacy ivacaftor Cystic Fibrosis 1043737620 \n", + "1 Efficacy ivacaftor Cystic Fibrosis 1043737636 \n", + "\n", + " Evidence Type PMID \\\n", + "0 Variant Functional Assay Annotation 23757361 \n", + "1 Variant Functional Assay Annotation 23891399 \n", + "\n", + " Summary \\\n", + "0 Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells. \n", + "1 Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines. \n", + "\n", + " Variant Annotation ID Variant/Haplotypes_var_fa Gene_var_fa Drug(s)_var_fa \\\n", + "0 1043737620 rs75527207 CFTR ivacaftor \n", + "1 1043737636 rs75527207 CFTR ivacaftor \n", + "\n", + " PMID_var_fa Phenotype Category_var_fa Significance \\\n", + "0 23757361 Efficacy yes \n", + "1 23891399 Efficacy yes \n", + "\n", + " Notes \\\n", + "0 compared to no treatment. Ivacaftor stimulated CFTR activity in CFTR-G551D expressing CHO cells (as measured by iodine efflux). \n", + "1 G551D allele. 55.3 fold increase in chloride transport upon ivacaftor treatment as compared to baseline (no ivacaftor treatment). \n", + "\n", + " Sentence \\\n", + "0 Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells. \n", + "1 Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines. \n", + "\n", + " Alleles Specialty Population Assay type Metabolizer types isPlural \\\n", + "0 A NaN NaN NaN Is \n", + "1 A NaN NaN NaN Is \n", + "\n", + " Is/Is Not associated Direction of effect Functional terms Gene/gene product \\\n", + "0 Associated with increased activity of CFTR \n", + "1 Associated with NaN activity of CFTR \n", + "\n", + " When treated with/exposed to/when assayed with Multiple drugs And/or \\\n", + "0 when treated with NaN \n", + "1 when treated with NaN \n", + "\n", + " Cell type Comparison Allele(s) or Genotype(s) \\\n", + "0 in transfected CHO cells NaN \n", + "1 in FRT cell lines NaN \n", + "\n", + " Comparison Metabolizer types \n", + "0 NaN \n", + "1 NaN " + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_981755803_fa" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "49c52d4d-8c99-4788-a922-310ac566b4f6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Clinical Annotation IDVariant/HaplotypesGeneLevel of EvidencePhenotype CategoryDrug(s)Phenotype(s)Evidence IDEvidence TypePMIDSummaryVariant Annotation IDVariant/Haplotypes_var_phenoGene_var_phenoDrug(s)_var_phenoPMID_var_phenoPhenotype Category_var_phenoSignificanceNotesSentenceAllelesSpecialty PopulationMetabolizer typesisPluralIs/Is Not associatedDirection of effectSide effect/efficacy/otherPhenotypeMultiple phenotypes And/orWhen treated with/exposed to/when assayed withMultiple drugs And/orPopulation typesPopulation Phenotypes or diseasesMultiple phenotypes or diseases And/orComparison Allele(s) or Genotype(s)Comparison Metabolizer types
0981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1448267532Variant Phenotype Annotation27745802Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG.1448267532rs75527207CFTRivacaftor27745802EfficacyyesBone mineral density compared before and after 1 year of treatment with ivacaftor using dual energy X-ray absorptiometry at the L2-L4 lumbar spine. All patients were pancreatic insufficient.Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG.AA + AGNaNNaNIsAssociated withdecreasedseverity ofSide Effect:bone densityandwhen treated withNaNin people withDisease:Cystic FibrosisNaNGGNaN
1981755803rs75527207CFTR1AEfficacyivacaftorCystic Fibrosis1449192031Variant Phenotype Annotation28651844Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis.1449192031rs75527207CFTRivacaftor28651844EfficacyyesG551D allele. Patients receiving ivacaftor treatment had a reduced rate of pulmonary exacerbation events compared to patients receiving a placebo.Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis.APediatricNaNIsAssociated withdecreasedlikelihood ofDisease:cystic fibrosis pulmonary exacerbationandwhen treated withNaNin people withDisease:Cystic FibrosisNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " Clinical Annotation ID Variant/Haplotypes Gene Level of Evidence \\\n", + "0 981755803 rs75527207 CFTR 1A \n", + "1 981755803 rs75527207 CFTR 1A \n", + "\n", + " Phenotype Category Drug(s) Phenotype(s) Evidence ID \\\n", + "0 Efficacy ivacaftor Cystic Fibrosis 1448267532 \n", + "1 Efficacy ivacaftor Cystic Fibrosis 1449192031 \n", + "\n", + " Evidence Type PMID \\\n", + "0 Variant Phenotype Annotation 27745802 \n", + "1 Variant Phenotype Annotation 28651844 \n", + "\n", + " Summary \\\n", + "0 Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n", + "1 Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis. \n", + "\n", + " Variant Annotation ID Variant/Haplotypes_var_pheno Gene_var_pheno \\\n", + "0 1448267532 rs75527207 CFTR \n", + "1 1449192031 rs75527207 CFTR \n", + "\n", + " Drug(s)_var_pheno PMID_var_pheno Phenotype Category_var_pheno Significance \\\n", + "0 ivacaftor 27745802 Efficacy yes \n", + "1 ivacaftor 28651844 Efficacy yes \n", + "\n", + " Notes \\\n", + "0 Bone mineral density compared before and after 1 year of treatment with ivacaftor using dual energy X-ray absorptiometry at the L2-L4 lumbar spine. All patients were pancreatic insufficient. \n", + "1 G551D allele. Patients receiving ivacaftor treatment had a reduced rate of pulmonary exacerbation events compared to patients receiving a placebo. \n", + "\n", + " Sentence \\\n", + "0 Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n", + "1 Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis. \n", + "\n", + " Alleles Specialty Population Metabolizer types isPlural \\\n", + "0 AA + AG NaN NaN Is \n", + "1 A Pediatric NaN Is \n", + "\n", + " Is/Is Not associated Direction of effect Side effect/efficacy/other \\\n", + "0 Associated with decreased severity of \n", + "1 Associated with decreased likelihood of \n", + "\n", + " Phenotype Multiple phenotypes And/or \\\n", + "0 Side Effect:bone density and \n", + "1 Disease:cystic fibrosis pulmonary exacerbation and \n", + "\n", + " When treated with/exposed to/when assayed with Multiple drugs And/or \\\n", + "0 when treated with NaN \n", + "1 when treated with NaN \n", + "\n", + " Population types Population Phenotypes or diseases \\\n", + "0 in people with Disease:Cystic Fibrosis \n", + "1 in people with Disease:Cystic Fibrosis \n", + "\n", + " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n", + "0 NaN GG \n", + "1 NaN NaN \n", + "\n", + " Comparison Metabolizer types \n", + "0 NaN \n", + "1 NaN " + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_981755803_pheno" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "08abfc16-b970-4f45-96c1-5e1584ba9a0e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "28" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Comparing number of PMIDs vs. number of evidence\n", + "len(set(df_981755803_drug['PMID']) | set(df_981755803_pheno['PMID']) | set(df_981755803_fa['PMID']))" + ] + }, + { + "cell_type": "markdown", + "id": "e5d40be8-65a0-487b-a175-d98aa502aaeb", + "metadata": {}, + "source": [ + "#### Observations so far\n", + "Clinical annotation [981755803](https://www.pharmgkb.org/clinicalAnnotation/981755803) has 30 supporting evidence:\n", + "* 24 variant/drug annotations\n", + "* 2 variant/functional assay annotations\n", + "* 2 variant/phenotype annotations\n", + "* 2 others (drug labels & guidelines, present in another data download so not included here)\n", + "\n", + "Each variant annotation is associated with a PMID, these are 1:1 (at least in this example).\n", + "* We should think about whether we want to preserve the PMID & evidence associations.\n", + "\n", + "These annotations seem much more specific than the clinical annotations, e.g.\n", + "* they distinguish between \"disease\" and \"side effect\" (check how often)\n", + "* if there are multiple phenotypes or drugs, they specify whether these should be \"and\"s or \"or\"s\n", + "\n", + "These annotations are specific to one or more alleles or genotypes, so we will need to associate them accordingly.\n", + "\n", + "It would good if we could select the relevant columns from the 3 variant annotation tables and merge them into a unified representation, so we don't have to manage them separately in the pipelines or in the UI\n" + ] + }, + { + "cell_type": "markdown", + "id": "85e68502-9717-40bd-989a-e9aa7376879d", + "metadata": {}, + "source": [ + "## Coverage\n", + "\n", + "[Top of page](#Table-of-contents)\n", + "\n", + "How many annotations have evidence, how many have direction of effect specifically" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "57d1856b-6b82-4084-9def-5c95db9cdfef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5111" + ] + }, + "execution_count": 97, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(clinical_annotations)" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "23b76684-1ceb-4dd5-b95a-931a57fbcafc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "15129" + ] + }, + "execution_count": 129, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Exploded on evidence - average 3 per annotation\n", + "len(main_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "8d92bd51-114f-4b9f-961e-3afcc9084560", + "metadata": {}, + "outputs": [], + "source": [ + "# Add all the var annotation tables - this will be a huge mess\n", + "main_with_var = pd.merge(main_df, var_drug_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='left', suffixes=(None, '_var_drug'))\n", + "main_with_var = pd.merge(main_with_var, var_pheno_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='left', suffixes=(None, '_var_pheno'))\n", + "main_with_var = pd.merge(main_with_var, var_fa_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='left', suffixes=(None, '_var_fa'))" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "67228cc8-68e9-4265-997d-047a31458c7d", + "metadata": {}, + "outputs": [], + "source": [ + "ca_with_var_evidence = set(main_with_var[main_with_var['Sentence'].notna() | main_with_var['Sentence_var_pheno'].notna() | main_with_var['Sentence_var_fa'].notna()][ID_COL_NAME])" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "id": "84438d2c-21c2-4411-bbda-742a2f3e94da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5111" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Every clinical annotation has at least one variant annotation as supporting evidence\n", + "len(ca_with_var_evidence)" + ] + }, + { + "cell_type": "code", + "execution_count": 205, + "id": "3dee0e63-6bfa-4c59-9838-cc129e4aacad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Evidence from var/drug 2435\n", + "Evidence from var/pheno 2958\n", + "Evidence from var/fa 418\n" + ] + } + ], + "source": [ + "print('Evidence from var/drug', len(set(main_with_var[main_with_var['Sentence'].notna()][ID_COL_NAME])))\n", + "print('Evidence from var/pheno', len(set(main_with_var[main_with_var['Sentence_var_pheno'].notna()][ID_COL_NAME])))\n", + "print('Evidence from var/fa', len(set(main_with_var[main_with_var['Sentence_var_fa'].notna()][ID_COL_NAME])))" + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "id": "38006cda-cedf-44d2-9d72-50ec1baf922e", + "metadata": {}, + "outputs": [], + "source": [ + "def main_with_var_where_notna(common_col_name):\n", + " # Filter main_with_var on non-na columns that are common to all three variant annotation tables\n", + " return main_with_var[\n", + " main_with_var[common_col_name].notna()\n", + " | main_with_var[f'{common_col_name}_var_pheno'].notna()\n", + " | main_with_var[f'{common_col_name}_var_fa'].notna()\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "id": "3bea97eb-cbaa-4cc6-873a-33f240140d60", + "metadata": {}, + "outputs": [], + "source": [ + "def main_with_var_values_in(common_col_name):\n", + " # Return set of values in given column, common to all three variant annotation tables\n", + " return (\n", + " set(main_with_var[common_col_name]) \n", + " | set(main_with_var[f'{common_col_name}_var_pheno']) \n", + " | set(main_with_var[f'{common_col_name}_var_fa'])\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 231, + "id": "a9814220-d767-4e1e-b3ca-ea27b61a5777", + "metadata": {}, + "outputs": [], + "source": [ + "ca_with_doe_evidence = set(main_with_var_where_notna('Direction of effect')[ID_COL_NAME])" + ] + }, + { + "cell_type": "code", + "execution_count": 232, + "id": "3be7753e-902e-47ec-985e-e2b08f70158d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4917" + ] + }, + "execution_count": 232, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Most contain some kind of direction of effect info\n", + "len(ca_with_doe_evidence)" + ] + }, + { + "cell_type": "code", + "execution_count": 230, + "id": "05a195e7-42ec-4b26-a8b0-d7aa0f463053", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.9620426531011543" + ] + }, + "execution_count": 230, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "4917 / 5111" + ] + }, + { + "cell_type": "code", + "execution_count": 234, + "id": "05435327-2590-4018-abdd-e3afe14632a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total 15129\n", + "With variant annotation 14658\n", + "With PMID 14658\n", + "With allele 14248\n", + "With comparison allele 12464\n" + ] + } + ], + "source": [ + "print('Total', len(main_with_var)) # i.e. clinical annotations exploded by evidence id\n", + "print('With variant annotation', len(main_with_var_where_notna('Variant Annotation ID')))\n", + "print('With PMID', len(main_with_var_where_notna('PMID')))\n", + "print('With allele', len(main_with_var_where_notna('Alleles')))\n", + "print('With comparison allele', len(main_with_var_where_notna('Comparison Allele(s) or Genotype(s)')))" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "id": "712f2365-fb6a-4c12-b73f-167756358e48", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "27427" + ] + }, + "execution_count": 199, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Suddenly worried about counts\n", + "# All variant annotation IDs in all three tables\n", + "len(set(var_drug_ann['Variant Annotation ID']) | set(var_fa_ann['Variant Annotation ID']) | set(var_pheno_ann['Variant Annotation ID']))" + ] + }, + { + "cell_type": "code", + "execution_count": 200, + "id": "fbae3775-37f2-42e2-a639-5596bfab2dad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13783" + ] + }, + "execution_count": 200, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# All evidence IDs - includes variant annotations and drug labels\n", + "len(set(main_df['Evidence ID']))" + ] + }, + { + "cell_type": "code", + "execution_count": 180, + "id": "b55fcd03-ab2d-4cc4-8c84-4c8100abc2ee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "13778" + ] + }, + "execution_count": 180, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "all_var_ann_ids = set(var_drug_ann['Variant Annotation ID']) | set(var_fa_ann['Variant Annotation ID']) | set(var_pheno_ann['Variant Annotation ID'])\n", + "all_ev_ids = set(main_df['Evidence ID'])\n", + "\n", + "# Not all variant annotation evidence is used\n", + "len(all_var_ann_ids - all_ev_ids)" + ] + }, + { + "cell_type": "markdown", + "id": "98c401bc-d920-405f-90a1-93016c7ed9ae", + "metadata": {}, + "source": [ + "#### Observations so far:\n", + "* Every clinical annotation has at least one variant annotation as supporting evidence\n", + "* Most contain some kind of direction of effect info (i.e. in one of the three tables)\n", + " * => coverage is good, assuming we care about all three types of effects\n", + "* Selecting one table covers at most about half of the clinical annotations\n", + "* Not all variant annotation evidence is included in a clinical annotation" + ] + }, + { + "cell_type": "markdown", + "id": "dc4b1bfd-a8ab-4215-832d-5a6aa177470c", + "metadata": {}, + "source": [ + "## Direction of effect\n", + "\n", + "[Top of page](#Table-of-contents)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "01ff8ed9-bdc2-454e-9a68-c9530b9f5de8", + "metadata": {}, + "outputs": [], + "source": [ + "# Trying to make sense of the columns - output suppressed for brevity\n", + "main_with_var.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "126d1762-ae36-4003-989d-c28d96b78e12", + "metadata": {}, + "outputs": [], + "source": [ + "all_var_ann_cols = set(var_drug_ann.columns) | set(var_fa_ann.columns) | set(var_pheno_ann.columns)\n", + "common_var_ann_cols = set(var_drug_ann.columns) & set(var_fa_ann.columns) & set(var_pheno_ann.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "86c927dd-2dfc-4957-9afb-50eb7334d213", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Alleles',\n", + " 'Comparison Allele(s) or Genotype(s)',\n", + " 'Comparison Metabolizer types',\n", + " 'Direction of effect',\n", + " 'Drug(s)',\n", + " 'Gene',\n", + " 'Is/Is Not associated',\n", + " 'Metabolizer types',\n", + " 'Multiple drugs And/or',\n", + " 'Notes',\n", + " 'PMID',\n", + " 'Phenotype Category',\n", + " 'Sentence',\n", + " 'Significance',\n", + " 'Specialty Population',\n", + " 'Variant Annotation ID',\n", + " 'Variant/Haplotypes',\n", + " 'isPlural'}" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "common_var_ann_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "44b41088-2162-4138-a719-1de7e4c3c156", + "metadata": {}, + "outputs": [], + "source": [ + "unique_var_ann_cols = all_var_ann_cols - common_var_ann_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "4f19f44b-d472-4b1b-ae13-1faa8a3ea5b9", + "metadata": {}, + "outputs": [], + "source": [ + "# annotate with origin table\n", + "annotated_unique_var_ann_cols = {'drug':[], 'fa':[], 'pheno':[]}\n", + "for c in unique_var_ann_cols:\n", + " if c in var_drug_ann.columns:\n", + " annotated_unique_var_ann_cols['drug'].append(c)\n", + " if c in var_fa_ann.columns:\n", + " annotated_unique_var_ann_cols['fa'].append(c)\n", + " if c in var_pheno_ann.columns:\n", + " annotated_unique_var_ann_cols['pheno'].append(c)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "id": "888398bc-00ce-45bd-af1b-0053c47ba3c3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'drug': ['Multiple phenotypes or diseases And/or',\n", + " 'Population types',\n", + " 'Population Phenotypes or diseases',\n", + " 'PD/PK terms'],\n", + " 'fa': ['Cell type',\n", + " 'Functional terms',\n", + " 'When treated with/exposed to/when assayed with',\n", + " 'Gene/gene product',\n", + " 'Assay type'],\n", + " 'pheno': ['Multiple phenotypes or diseases And/or',\n", + " 'Side effect/efficacy/other',\n", + " 'Multiple phenotypes And/or',\n", + " 'When treated with/exposed to/when assayed with',\n", + " 'Population types',\n", + " 'Population Phenotypes or diseases',\n", + " 'Phenotype']}" + ] + }, + "execution_count": 137, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "annotated_unique_var_ann_cols" + ] + }, + { + "cell_type": "markdown", + "id": "0a3b9cbd-c13e-4e98-831a-aa508fc69d10", + "metadata": {}, + "source": [ + "#### Sentence breakdown\n", + "\n", + "[Top of page](#Table-of-contents)\n", + "\n", + "* Population phenotype always goes with \"multiple phenotypes and/or\" (note functional assay doesn't mention phenotype - I guess there's no multiples in the gene product?)\n", + " * where does the main table phenotype come from? OT is overwriting this now but I'm still confused\n", + "* Drug (main table) always goes with \"multiple drugs and/or\"\n", + "* Comparison alleles in theory I guess tells us something about the reference or baseline, not always present though\n", + "\n", + "Sentence examples:\n", + "* **drug**: \"Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.\"\n", + " * alleles = \"AA + AG\"\n", + " * direction of effect = [none]\n", + " * pd/pk term = \"response to\"\n", + " * drug = \"ivacaftor\"\n", + " * population types = \"people\"\n", + " * population phenotype = \"cystic fibrosis\"\n", + " * comparison alleles/genotypes = [none]\n", + "* **fa**: \"Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells.\"\n", + " * alleles = \"A\"\n", + " * direction of effect = \"increased\"\n", + " * functional term = \"activity of\"\n", + " * gene/gene product = \"CFTR\"\n", + " * when treated with/exposed to/assayed with = \"when treated with\"\n", + " * drug = \"ivacaftor\"\n", + " * cell type = \"transfected CHO cells\"\n", + " * comparison alleles/genotypes = [none]\n", + "* **pheno**: \"Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG.\"\n", + " * alleles = \"AA + AG\"\n", + " * direction of effect = \"decreased\"\n", + " * side effect/efficacy/other = \"severity of\"\n", + " * phenotype = \"bone density\" **< note distinction between this and population phenotype**\n", + " * when treated with/exposed to/assayed with = \"when treated with\"\n", + " * drug = \"ivacaftor\"\n", + " * population types = \"people\"\n", + " * population phenotype = \"cystic fibrosis\"\n", + " * comparison alleles/genotypes = \"GG\"\n", + "\n", + "For the simplest direction of effect annotation (i.e. not including the population, cell type, etc.), I think we only strictly need the following:\n", + "1. direction of effect\n", + "2. pd/pk term | functional term | side effect/efficacy/other\n", + "3. drug | gene/gene product | phenotype\n", + "\n", + "This tells us the direction (1) and what the effect is (2&3).\n", + "Of course we also need the alleles to associate with the appropriate evidence string, maybe also the comparison alleles/genotypes when present, and the \"is/is not associated\" column so we don't report negative results (unless we want to).\n", + "\n", + "Maybe also just the origin of the evidence (variant/drug, variant/phenotype, or functional analysis) is useful." + ] + }, + { + "cell_type": "markdown", + "id": "1b212421-daaf-4ffb-bcc4-66a2afb94794", + "metadata": {}, + "source": [ + "#### Vocabulary\n", + "\n", + "[Top of page](#Table-of-contents)\n", + "\n", + "Check some vocabulary - how variable or consistent are the most critical terms, are they using fixed vocab, etc." + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "id": "d5a77ac2-d058-426b-8d54-10aefd5023c3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'decreased', 'increased', nan}" + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# DoE fixed vocab according to readme\n", + "set(main_with_var['Direction of effect']) | set(main_with_var['Direction of effect_var_pheno']) | set(main_with_var['Direction of effect_var_fa'])" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "id": "faa11da3-07ad-4019-bfd7-589342c435aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'clearance of',\n", + " 'clinical benefit to',\n", + " 'concentrations of',\n", + " 'discontinuation of',\n", + " 'dose of',\n", + " 'dose-adjusted trough concentrations of',\n", + " 'exposure to',\n", + " 'half-life time of',\n", + " 'metabolism of',\n", + " nan,\n", + " 'resistance to',\n", + " 'response to',\n", + " 'steady-state concentration of',\n", + " 'time to response to',\n", + " 'trough concentration of'}" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(main_with_var['PD/PK terms']) # not limited according to readme" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "id": "61e8c91c-c963-4710-82f1-e6ec9f4c8def", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'activity of',\n", + " 'affinity to',\n", + " 'catalytic activity of',\n", + " 'clearance of',\n", + " 'concentrations of',\n", + " 'enzyme activity of',\n", + " 'expression of',\n", + " 'formation of',\n", + " 'glucuronidation of',\n", + " 'half-life of',\n", + " 'inhibition of',\n", + " 'metabolism of',\n", + " nan,\n", + " 'protein stability of',\n", + " 'sensitivity to',\n", + " 'steady-state level of',\n", + " 'sulfation of',\n", + " 'transcription of',\n", + " 'transport of',\n", + " 'uptake of'}" + ] + }, + "execution_count": 159, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(main_with_var['Functional terms']) # not limited" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "id": "87c61724-3034-413f-b691-6ea8d475ae5d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'age at onset of', 'likelihood of', nan, 'risk of', 'severity of'}" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(main_with_var['Side effect/efficacy/other']) # limited" + ] + }, + { + "cell_type": "markdown", + "id": "da232356-d984-4e35-bbe3-e5811b68d172", + "metadata": {}, + "source": [ + "Assume the final term (drug | gene/gene product | phenotype) will vary, but hopefully we can also map it to the relevant domain if needed (CHEMBL, EFO, Ensembl). Drugs & genes terms look about what you'd expect, as usual phenotype is the most diverse (see below; the readme explicitly states phenotype is not standardized).\n", + "\n", + "Otherwise this is a pretty small and consistent set of terms for ca. 15000 rows, though I don't think we can assume the vocab won't grow (except the actual direction word, we should be good there)." + ] + }, + { + "cell_type": "code", + "execution_count": 228, + "id": "fb68e303-a627-418a-83d0-e1894a382e65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['PK:differences in exposure to the active metabolite of prasugrel',\n", + " 'Disease:Endometrial Neoplasms',\n", + " '\"Disease:Epidermal Necrolysis, Toxic\", \"Disease:Stevens-Johnson Syndrome\"',\n", + " 'Side Effect:total hemorrhage and major hemorrhage',\n", + " 'Other:subjective feelings of intoxication, stimulation, sedation, and happiness',\n", + " 'PK:plasma oxymorphone/oxycodone ratio',\n", + " 'Disease:Hematologic Diseases',\n", + " 'Side Effect:Venous Thrombosis',\n", + " 'Efficacy:non-remission',\n", + " 'Side Effect:Leukopenia']" + ] + }, + "execution_count": 228, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "list(set(main_with_var['Phenotype']))[1:11]" + ] + }, + { + "cell_type": "code", + "execution_count": 219, + "id": "39f1d19c-48cd-40c4-8e5b-a13e13364272", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1735" + ] + }, + "execution_count": 219, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(set(main_with_var['Phenotype']))" + ] + }, + { + "cell_type": "code", + "execution_count": 216, + "id": "6182da30-8a3e-4f28-8994-17aa028a2f55", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Disease', 'Efficacy', 'Other', 'PK', 'Side Effect'}" + ] + }, + "execution_count": 216, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Dirty attempt to get the prefix - doesn't account for multiples\n", + "set(main_with_var['Phenotype'].dropna().apply(lambda p: p.split(':')[0].strip('\"')))" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "id": "0965c47c-84f1-4b3b-b164-eab5011b3c94", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1499" + ] + }, + "execution_count": 218, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(set(main_with_var['Phenotype'].dropna().apply(lambda p: p.split(':')[1].strip('\"'))))" + ] + }, + { + "cell_type": "markdown", + "id": "49c5775d-3fd8-4ab7-9e41-3cb47923555e", + "metadata": {}, + "source": [ + "The prefix looks to be fixed and always present, which is nice and honestly kind of surprising actually. The rest I think can come from body of PGKB terms or be filled in freely. We could perhaps map them (with OLS or NLP).\n", + "\n", + "Same is true for population phenotypes when present:" + ] + }, + { + "cell_type": "code", + "execution_count": 226, + "id": "b3dbc780-9a76-4b2d-bf11-a090e9f66d7e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Disease', 'Efficacy', 'Other', 'PK', 'Side Effect'}" + ] + }, + "execution_count": 226, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# var_fa does not have the column so I can't use my nice function :(\n", + "(\n", + " set(main_with_var['Population Phenotypes or diseases'].dropna().apply(lambda p: p.split(':')[0].strip('\"'))) \n", + " | set(main_with_var['Population Phenotypes or diseases_var_pheno'].dropna().apply(lambda p: p.split(':')[0].strip('\"'))) \n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "59bcdfc1-98df-4f21-aa2e-487b2d4af3dc", + "metadata": {}, + "source": [ + "## Alleles and genotypes\n", + "\n", + "[Top of page](#Table-of-contents)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "423328ff-d104-474f-bc96-153df5945c19", + "metadata": {}, + "outputs": [], + "source": [ + "# Visual inspection of alleles - output suppressed for brevity\n", + "main_with_var_values_in('Alleles')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9279f41-dd40-4d15-a553-6ab08f8312b4", + "metadata": {}, + "outputs": [], + "source": [ + "main_with_var_values_in('Comparison Allele(s) or Genotype(s)')" + ] + }, + { + "cell_type": "markdown", + "id": "63924c24-053d-4fc2-806a-4e77fad198b4", + "metadata": {}, + "source": [ + "Alleles and comparison alleles look relatively consistent with what's in the alleles table:\n", + "* SNP genotype `C/T` or `CC` (annoying)\n", + "* SNP allele `C`\n", + "* indel `GGGGAGCTTTCCCAGAGACCC/del`\n", + "* named allele `*17` or `HTTLPR short form (S allele)`\n", + "* named genotype `*2/*4`\n", + "* combinations of the above delineated with `+`" + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "id": "589d7070-76d0-4559-bafc-b422646134ca", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['G6PD Canton, Taiwan-Hakka, Gifu-like, Agrigento-like',\n", + " 'G6PD A- 202A_376G',\n", + " 'G6PD A- 202A_376G, G6PD B (reference)',\n", + " 'CYP1A2 high activity',\n", + " 'SLC6A4 HTTLPR long form (L allele), SLC6A4 HTTLPR short form (S allele)',\n", + " 'SLC6A4 HTTLPR short form (S allele)',\n", + " 'CYP2D6 poor metabolizer genotype',\n", + " 'CYP1A2 low activity',\n", + " 'CYP2A6 poor metabolizer genotype',\n", + " 'CYP2D6 ultrarapid metabolizer genotype',\n", + " 'CYP2D6 low activity',\n", + " 'CYP2A6 low activity',\n", + " 'G6PD B (reference), G6PD Mediterranean Haplotype',\n", + " 'CYP2C19 poor metabolizer phenotype',\n", + " 'CYP2C19 poor metabolizers',\n", + " 'CYP2C19 poor metabolizer genotype',\n", + " 'CYP2D6 poor metabolizer phenotype',\n", + " 'CYP3A4 low activity',\n", + " 'TPMT intermediate metabolizer phenotype',\n", + " 'G6PD deficiency',\n", + " 'NAT2 slow acetylator',\n", + " 'CYP2D6 ultrarapid metabolizer phenotype',\n", + " 'CYP2D6 poor and ultrarapid metabolizers',\n", + " 'CYP2D6 poor metabolizer and intermediate metabolizer genotypes',\n", + " 'CYP2D6 normal metabolizer and ultrarapid metabolizer genotypes',\n", + " 'CYP2C19 normal metabolizers',\n", + " 'CYP2C19 poor metabolizer and intermediate metabolizer genotypes',\n", + " 'SLC6A4 HTTLPR short form (S allele), SLC6A4 L allele-rs25531C, SLC6A4 L allele-rs25531T',\n", + " 'CYP2C9 poor metabolizer',\n", + " 'GSTT1 non-null, GSTT1 null',\n", + " 'CYP2A6 intermediate activity',\n", + " 'G6PD B (reference), G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n", + " 'G6PD A- 202A_376G, G6PD B (reference), G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n", + " 'GSTM1 non-null, GSTM1 null',\n", + " 'G6PD A- 202A_376G, G6PD B (reference), G6PD Mediterranean Haplotype, G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n", + " 'TPMT intermediate metabolizer genotype',\n", + " 'CYP2C19 intermediate metabolizer',\n", + " 'G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n", + " 'CYP2D6 normal metabolizer genotype',\n", + " 'G6PD B (reference), G6PD Mediterranean Haplotype, G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n", + " 'CYP2D6 poor metabolizer',\n", + " 'GSTT1 null',\n", + " 'G6PD B (reference), G6PD Canton, Taiwan-Hakka, Gifu-like, Agrigento-like',\n", + " 'TPMT poor metabolizer phenotype',\n", + " 'CYP2D6 poor metabolizers',\n", + " 'CYP2C19 normal metabolizer',\n", + " 'CYP2C19 intermediate metabolizers',\n", + " 'CYP2D6 normal metabolizers',\n", + " 'GSTM1 null',\n", + " 'NAT2 intermediate acetylator',\n", + " 'TPMT poor metabolizers',\n", + " 'CYP2D6 intermediate metabolizers',\n", + " 'GSTM1 non-null',\n", + " 'TPMT intermediate metabolizers',\n", + " 'SLC6A4 L allele-rs25531T',\n", + " 'CYP2C19 poor metabolizer',\n", + " 'CYP2D6 poor metabolizers and intermediate metabolizers']" + ] + }, + "execution_count": 203, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "variant_haps = main_with_var_values_in('Variant/Haplotypes')\n", + "\n", + "[v for v in variant_haps if pd.notna(v) and not (v.startswith('rs') or '*' in v)]" + ] + }, + { + "cell_type": "markdown", + "id": "0beac63b-2343-4410-a8ac-24896e140dd0", + "metadata": {}, + "source": [ + "Some of these are just named (non-star) alleles, but things like \"CYP2A6 poor metabolizer genotype\" are where the comparison metabolyzer gets used as opposed to comparison alleles.\n", + "\n", + "Not sure what to do about these - we can't easily associate them with an allele or genotype, only with the clinical annotation as a whole.\n", + "\n", + "Here's [one example](https://www.pharmgkb.org/clinicalAnnotation/1139506787) - clincial annotation has many haplotype-level annotations, but the variant annotation is only given for \"poor metabolizer genotype\"." + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "id": "2cb28399-d5e8-4e41-9d66-584c0dc20fd1", + "metadata": {}, + "outputs": [], + "source": [ + "def main_with_var_where_equals(common_col_name, value):\n", + " # Filter main_with_var on columns = value that are common to all three variant annotation tables\n", + " return main_with_var[\n", + " (main_with_var[common_col_name] == value)\n", + " | (main_with_var[f'{common_col_name}_var_pheno'] == value)\n", + " | (main_with_var[f'{common_col_name}_var_fa'] == value)\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 243, + "id": "dc941094-468a-409e-b099-895d9935bb52", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Clinical Annotation IDVariant/HaplotypesGeneLevel of EvidencePhenotype CategoryDrug(s)Phenotype(s)Evidence IDEvidence TypePMIDSummaryVariant Annotation ID_var_phenoVariant/Haplotypes_var_phenoGene_var_phenoDrug(s)_var_phenoPMID_var_phenoPhenotype Category_var_phenoSignificance_var_phenoNotes_var_phenoSentence_var_phenoAlleles_var_phenoSpecialty Population_var_phenoMetabolizer types_var_phenoisPlural_var_phenoIs/Is Not associated_var_phenoDirection of effect_var_phenoSide effect/efficacy/otherPhenotypeMultiple phenotypes And/orWhen treated with/exposed to/when assayed withMultiple drugs And/or_var_phenoPopulation types_var_phenoPopulation Phenotypes or diseases_var_phenoMultiple phenotypes or diseases And/or_var_phenoComparison Allele(s) or Genotype(s)_var_phenoComparison Metabolizer types_var_pheno
106221139506787CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55CYP2A61BMetabolism/PKnicotineTobacco Use Disorder1183689160Variant Phenotype Annotation23371292CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer.1183689160CYP2A6 poor metabolizer genotypeCYP2A6nicotine23371292Metabolism/PKyesIn CYP2A6 reduced metabolizers, cotinine formation was altered less than was cotinine removal as compared to normal metabolizers. Ratios of cotinine formation to removal were 1.31 for reduced metabolizers and 1.12 for normal metabolizers . Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35.CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer.NaNNaNpoor metabolizerIsAssociated withincreasedNaNPK:ratio of cotinine formation to removalNaNwhen exposed toNaNinOther:nonsmokersNaNNaNnormal metabolizer
106231139506787CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55CYP2A61BMetabolism/PKnicotineTobacco Use Disorder1183689165Variant Phenotype Annotation23371292CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer.1183689165CYP2A6 poor metabolizer genotypeCYP2A6nicotine23371292Metabolism/PKyesIn CYP2A6 reduced metabolizers, the slope between urinary TNE (a measurement of tobacco exposure) and plasma cotinine was significantly lower as compared to normal metabolizers. Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35.CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer.NaNNaNpoor metabolizerIsAssociated withdecreasedNaNPK:ratio of plasma cotinine to urinary TNENaNwhen exposed toNaNinOther:smokersNaNNaNnormal metabolizer
\n", + "
" + ], + "text/plain": [ + " Clinical Annotation ID \\\n", + "10622 1139506787 \n", + "10623 1139506787 \n", + "\n", + " Variant/Haplotypes \\\n", + "10622 CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55 \n", + "10623 CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55 \n", + "\n", + " Gene Level of Evidence Phenotype Category Drug(s) \\\n", + "10622 CYP2A6 1B Metabolism/PK nicotine \n", + "10623 CYP2A6 1B Metabolism/PK nicotine \n", + "\n", + " Phenotype(s) Evidence ID Evidence Type \\\n", + "10622 Tobacco Use Disorder 1183689160 Variant Phenotype Annotation \n", + "10623 Tobacco Use Disorder 1183689165 Variant Phenotype Annotation \n", + "\n", + " PMID \\\n", + "10622 23371292 \n", + "10623 23371292 \n", + "\n", + " Summary \\\n", + "10622 CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer. \n", + "10623 CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer. \n", + "\n", + " Variant Annotation ID_var_pheno Variant/Haplotypes_var_pheno \\\n", + "10622 1183689160 CYP2A6 poor metabolizer genotype \n", + "10623 1183689165 CYP2A6 poor metabolizer genotype \n", + "\n", + " Gene_var_pheno Drug(s)_var_pheno PMID_var_pheno \\\n", + "10622 CYP2A6 nicotine 23371292 \n", + "10623 CYP2A6 nicotine 23371292 \n", + "\n", + " Phenotype Category_var_pheno Significance_var_pheno \\\n", + "10622 Metabolism/PK yes \n", + "10623 Metabolism/PK yes \n", + "\n", + " Notes_var_pheno \\\n", + "10622 In CYP2A6 reduced metabolizers, cotinine formation was altered less than was cotinine removal as compared to normal metabolizers. Ratios of cotinine formation to removal were 1.31 for reduced metabolizers and 1.12 for normal metabolizers . Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35. \n", + "10623 In CYP2A6 reduced metabolizers, the slope between urinary TNE (a measurement of tobacco exposure) and plasma cotinine was significantly lower as compared to normal metabolizers. Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35. \n", + "\n", + " Sentence_var_pheno \\\n", + "10622 CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer. \n", + "10623 CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer. \n", + "\n", + " Alleles_var_pheno Specialty Population_var_pheno \\\n", + "10622 NaN NaN \n", + "10623 NaN NaN \n", + "\n", + " Metabolizer types_var_pheno isPlural_var_pheno \\\n", + "10622 poor metabolizer Is \n", + "10623 poor metabolizer Is \n", + "\n", + " Is/Is Not associated_var_pheno Direction of effect_var_pheno \\\n", + "10622 Associated with increased \n", + "10623 Associated with decreased \n", + "\n", + " Side effect/efficacy/other Phenotype \\\n", + "10622 NaN PK:ratio of cotinine formation to removal \n", + "10623 NaN PK:ratio of plasma cotinine to urinary TNE \n", + "\n", + " Multiple phenotypes And/or \\\n", + "10622 NaN \n", + "10623 NaN \n", + "\n", + " When treated with/exposed to/when assayed with \\\n", + "10622 when exposed to \n", + "10623 when exposed to \n", + "\n", + " Multiple drugs And/or_var_pheno Population types_var_pheno \\\n", + "10622 NaN in \n", + "10623 NaN in \n", + "\n", + " Population Phenotypes or diseases_var_pheno \\\n", + "10622 Other:nonsmokers \n", + "10623 Other:smokers \n", + "\n", + " Multiple phenotypes or diseases And/or_var_pheno \\\n", + "10622 NaN \n", + "10623 NaN \n", + "\n", + " Comparison Allele(s) or Genotype(s)_var_pheno \\\n", + "10622 NaN \n", + "10623 NaN \n", + "\n", + " Comparison Metabolizer types_var_pheno \n", + "10622 normal metabolizer \n", + "10623 normal metabolizer " + ] + }, + "execution_count": 243, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "main_with_var_where_equals('Variant/Haplotypes', 'CYP2A6 poor metabolizer genotype')[[\n", + " 'Clinical Annotation ID', 'Variant/Haplotypes', 'Gene',\n", + " 'Level of Evidence', 'Phenotype Category', 'Drug(s)', 'Phenotype(s)',\n", + " 'Evidence ID', 'Evidence Type', 'PMID', 'Summary',\n", + " 'Variant Annotation ID_var_pheno', 'Variant/Haplotypes_var_pheno',\n", + " 'Gene_var_pheno', 'Drug(s)_var_pheno', 'PMID_var_pheno',\n", + " 'Phenotype Category_var_pheno', 'Significance_var_pheno',\n", + " 'Notes_var_pheno', 'Sentence_var_pheno', 'Alleles_var_pheno',\n", + " 'Specialty Population_var_pheno', 'Metabolizer types_var_pheno',\n", + " 'isPlural_var_pheno', 'Is/Is Not associated_var_pheno',\n", + " 'Direction of effect_var_pheno', 'Side effect/efficacy/other',\n", + " 'Phenotype', 'Multiple phenotypes And/or',\n", + " 'When treated with/exposed to/when assayed with',\n", + " 'Multiple drugs And/or_var_pheno', 'Population types_var_pheno',\n", + " 'Population Phenotypes or diseases_var_pheno',\n", + " 'Multiple phenotypes or diseases And/or_var_pheno',\n", + " 'Comparison Allele(s) or Genotype(s)_var_pheno',\n", + " 'Comparison Metabolizer types_var_pheno'\n", + "]]" + ] + }, + { + "cell_type": "markdown", + "id": "daf324bb-3f75-46db-be7c-1cd8ea561964", + "metadata": {}, + "source": [ + "## Bonus material\n", + "\n", + "[Top of page](#Table-of-contents)\n", + "\n", + "Things I thought about but haven't checked yet:\n", + "* How many so-called \"alleles\" are actually these metabolyzer terms?\n", + " * might inform whether we need to associate via something other than allele, if many important annotations fall under this category\n", + "* Do we have to manage contradictory information for a single clinical annotation or even for a single allele/genotype?\n", + " * i.e. one study says genotype AA increases X, another says it decreases X, another says it decreases some other Y...\n", + " * maybe also check whether this occurs in level 1/2 evidence especially\n", + " * informs data structure - i.e. do fields need to be lists or strings\n", + "* How do multiple phenotypes, genes and drugs at the variant annotation level get aggregated at the clinical annotation level?\n", + " * [981755803](https://www.pharmgkb.org/clinicalAnnotation/981755803) indicates they do _not_ include all \"phenotype\" and \"population phenotype\" for all variant annotations at the clinical annotation level\n", + " * similarly interested in how they derive the clinical annotation sentence from all the variant annotation sentences, though it's arguably not important for our automated processing\n", + " \n", + "I also think there are at least 2 additional issues (not relating to direction of effect) that we can explore using these variant annotation tables, namely:\n", + "* Using the \"and/or\" column to clearly delineate drug combinations vs. drugs that are just being annotated together\n", + "* Using the additional phenotype annotations (side effect etc.) to disambiguate or supplement the phenotype information we use from the clinical annotation" + ] + }, + { + "cell_type": "markdown", + "id": "72471a5a-8c7f-4d4c-ae6f-92fa86c13d34", + "metadata": {}, + "source": [ + "## Post-meeting\n", + "\n", + "* Get a few representative (?!) examples of annotations\n", + "* Join with all variant evidence _and_ all clinical_alleles\n", + "* Dump to CSV" + ] + }, + { + "cell_type": "code", + "execution_count": 285, + "id": "442da098-f2a2-4436-89a0-e7a4760aad7d", + "metadata": {}, + "outputs": [], + "source": [ + "# Build a clean table showing everything\n", + "complete_df = pd.merge(clinical_annotations, clinical_ann_evidence, how='left', on=ID_COL_NAME)\n", + "complete_df = pd.merge(complete_df, clinical_ann_alleles, how='left', on=ID_COL_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": 286, + "id": "dc9116ed-cf4d-4c01-9a2d-f6189a77ee0e", + "metadata": {}, + "outputs": [], + "source": [ + "def get_annotation_tables_for_ids(ca_ids):\n", + " df = complete_df[complete_df[ID_COL_NAME].isin({str(id) for id in ca_ids})]\n", + " df_drug = pd.merge(df, var_drug_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_drug'))\n", + " df_pheno = pd.merge(df, var_pheno_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_pheno'))\n", + " df_fa = pd.merge(df, var_fa_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_fa'))\n", + " return df_drug, df_pheno, df_fa" + ] + }, + { + "cell_type": "code", + "execution_count": 276, + "id": "86bab485-ccf8-426b-9565-01fe35058871", + "metadata": {}, + "outputs": [], + "source": [ + "example_ca_ids = [981755803, 1139506787, 1183888969, 1184514050, 981419266]\n", + "\n", + "d, p, f = get_annotation_tables_for_ids(example_ca_ids)\n", + "d.to_csv(f'{data_dir}/example_drug.csv', index=False)\n", + "p.to_csv(f'{data_dir}/example_pheno.csv', index=False)\n", + "f.to_csv(f'{data_dir}/example_func.csv', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4cee848-abf7-422d-9d64-c3c4f66f5242", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt index 552658c..4c30a0e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ jsonschema==3.2.0 numpy==1.24.3 pandas==1.5.3 pytest==7.2.2 -requests==2.31.0 +requests==2.32.0 retry==0.9.2 cmat @ git+https://github.com/EBIvariation/eva-opentargets.git#egg=cmat \ No newline at end of file