diff --git a/data-exploration/direction-of-effect.ipynb b/data-exploration/direction-of-effect.ipynb
new file mode 100644
index 0000000..70525d8
--- /dev/null
+++ b/data-exploration/direction-of-effect.ipynb
@@ -0,0 +1,4181 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "9d2ed9ab-c78b-4d67-8b9b-18d22e44444b",
+ "metadata": {},
+ "source": [
+ "# Variant annotation tables & direction of effect investigation\n",
+ "\n",
+ "## Table of contents\n",
+ "\n",
+ "1. [Initial data exploration](#Initial-data-exploration)\n",
+ " 1. [Example clinical annotation](#Example-clinical-annotation)\n",
+ "2. [Coverage](#Coverage)\n",
+ "3. [Direction of effect](#Direction-of-effect)\n",
+ " 1. [Sentence breakdown](#Sentence-breakdown)\n",
+ " 2. [Vocabulary](#Vocabulary)\n",
+ "4. [Alleles and genotypes](#Alleles-and-genotypes)\n",
+ "5. [Bonus material](#Bonus-material)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "id": "c6f9476d-828f-4ff7-82df-e2830b3e2767",
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import csv\n",
+ "import re\n",
+ "from collections import Counter\n",
+ "\n",
+ "import pandas as pd\n",
+ "\n",
+ "from opentargets_pharmgkb.evidence_generation import ID_COL_NAME\n",
+ "from opentargets_pharmgkb.pandas_utils import read_tsv_to_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e6a46840-39c0-4518-8a05-9258568cda20",
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "pd.set_option('display.max_columns', None)\n",
+ "pd.set_option('display.max_rows', 100)\n",
+ "pd.set_option('display.max_colwidth', None)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bf50f6c5-0b82-41f7-932e-1b78c2dc22df",
+ "metadata": {},
+ "source": [
+ "## Initial data exploration\n",
+ "\n",
+ "[Top of page](#Table-of-contents)\n",
+ "\n",
+ "The variant annotations zip file contains 4 new tables, described in the readme as follows:\n",
+ ">* **var_pheno_ann.tsv**: Contains associations in which the variant affects a phenotype, with or without drug information.\n",
+ ">* **var_drug_ann.tsv**: Contains associations in which the variant affects a drug dose, response, metabolism, etc.\n",
+ ">* **var_fa_ann.tsv**: Contains in vitro and functional analysis-type associations.\n",
+ ">* **study_parameters.tsv**: Contains information about the study population size, biogeographical group and statistics for the variant annotations; this file is cross-referenced against the 3 variant annotation files.\n",
+ "\n",
+ "Study parameters table is interesting but feels out of scope for now at least, will ignore for the rest of the notebook."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "27861f6f-9343-4c44-be4f-4bf70e39eee4",
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "data_dir = '/home/april/projects/opentargets/pharmgkb/doe'"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "6c87bbc9-4c87-40a0-84d5-de9249f50192",
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Download new data (2024-05-05)\n",
+ "# !cd {data_dir}\n",
+ "\n",
+ "# !wget -q https://api.pharmgkb.org/v1/download/file/data/clinicalAnnotations.zip\n",
+ "# !wget -q https://api.pharmgkb.org/v1/download/file/data/variantAnnotations.zip\n",
+ "\n",
+ "# !unzip -jq clinicalAnnotations.zip \"*.tsv\" -d {data_dir}\n",
+ "# !unzip -jq variantAnnotations.zip \"*.tsv\" -d {data_dir}\n",
+ "\n",
+ "# !rm clinicalAnnotations.zip variantAnnotations.zip"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "4d823e4f-3e41-4bc6-a1d3-003c6bd14898",
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "var_drug_ann = read_tsv_to_df(os.path.join(data_dir, 'var_drug_ann.tsv'))\n",
+ "var_fa_ann = read_tsv_to_df(os.path.join(data_dir, 'var_fa_ann.tsv'))\n",
+ "var_pheno_ann = read_tsv_to_df(os.path.join(data_dir, 'var_pheno_ann.tsv'))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f7c7bbdb-9a0f-45ba-ac34-4070b8f0dbdb",
+ "metadata": {},
+ "source": [
+ "Questions to consider:\n",
+ "* How many annotations?\n",
+ "* What's the coverage of variant/haplotypes relative to clinical annotations?\n",
+ "* What are the relevant fields?\n",
+ "* What's the relationship between these annotations and clinical annotations?\n",
+ "* Which of these columns has a controlled vocab vs. free text?\n",
+ "* How do the different variant-level annotation sentences contribute to the overall clincial annotation sentences?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "0852d631-f8ff-4fa8-afe0-6b2599b0b9c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "11901"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(var_drug_ann)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "64ac3c98-d1cf-48c1-8805-ecdc4530d649",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2009"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(var_fa_ann)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "b826596b-12e4-4095-b9f5-f07796f0e7d7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13517"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(var_pheno_ann)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 245,
+ "id": "d22115ea-56f6-404d-9839-c91c8ed74a89",
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ },
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variant Annotation ID | \n",
+ " Variant/Haplotypes | \n",
+ " Gene | \n",
+ " Drug(s) | \n",
+ " PMID | \n",
+ " Phenotype Category | \n",
+ " Significance | \n",
+ " Notes | \n",
+ " Sentence | \n",
+ " Alleles | \n",
+ " Specialty Population | \n",
+ " Metabolizer types | \n",
+ " isPlural | \n",
+ " Is/Is Not associated | \n",
+ " Direction of effect | \n",
+ " PD/PK terms | \n",
+ " Multiple drugs And/or | \n",
+ " Population types | \n",
+ " Population Phenotypes or diseases | \n",
+ " Multiple phenotypes or diseases And/or | \n",
+ " Comparison Allele(s) or Genotype(s) | \n",
+ " Comparison Metabolizer types | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1451834452 | \n",
+ " CYP3A4*1, CYP3A4*17 | \n",
+ " CYP3A4 | \n",
+ " nifedipine | \n",
+ " 15634941 | \n",
+ " Other, Metabolism/PK | \n",
+ " not stated | \n",
+ " in vitro expression of the recombinant CYP3A4*17 allelic protein and the wild-type protein | \n",
+ " CYP3A4 *17 is associated with decreased metabolism of nifedipine as compared to CYP3A4 *1. | \n",
+ " *17 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " decreased | \n",
+ " metabolism of | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " *1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1451159680 | \n",
+ " rs5031016 | \n",
+ " CYP2A6 | \n",
+ " warfarin | \n",
+ " 22248286 | \n",
+ " Dosage | \n",
+ " no | \n",
+ " No association was found between this variant and warfarin-maintenance dose. Described as CYP2A6*7 in this study. | \n",
+ " Allele G is not associated with increased dose of warfarin in people with an international normalized ratio (INR) of 2.0-3.0 as compared to allele A. | \n",
+ " G | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Not associated with | \n",
+ " increased | \n",
+ " dose of | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Other:an international normalized ratio (INR) of 2.0-3.0 | \n",
+ " NaN | \n",
+ " A | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1451306860 | \n",
+ " CYP2C9*11 | \n",
+ " CYP2C9 | \n",
+ " warfarin | \n",
+ " 33350885 | \n",
+ " Dosage | \n",
+ " not stated | \n",
+ " \"This case suggests that CYP2C9 *11/*11 carriers require approximately two thirds less warfarin than CYP2C9\" normal function homozygotes. | \n",
+ " CYP2C9 *11/*11 is associated with decreased dose of warfarin. | \n",
+ " *11/*11 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " decreased | \n",
+ " dose of | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1448997750 | \n",
+ " CYP2B6*1, CYP2B6*18 | \n",
+ " CYP2B6 | \n",
+ " efavirenz | \n",
+ " 16495778 | \n",
+ " Metabolism/PK | \n",
+ " yes | \n",
+ " Please note that in the paper the allele was referred to as CYP2B6*16. CYP2B6*16 and *18 alleles have been consolidated by PharmVar in Jan 2020, with *16 now listed as a suballele of *18 (CYP2B6*18.002). This annotation is updated to be on CYP2B6*18, instead of CYP2B6*16. | \n",
+ " CYP2B6 *1/*18 is associated with increased concentrations of efavirenz in people with HIV Infections as compared to CYP2B6 *1/*1. | \n",
+ " *1/*18 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " concentrations of | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:HIV Infections | \n",
+ " NaN | \n",
+ " *1/*1 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1448631821 | \n",
+ " CYP2C19*1, CYP2C19*2 | \n",
+ " CYP2C19 | \n",
+ " clomipramine, desmethyl clomipramine | \n",
+ " 28470111 | \n",
+ " Metabolism/PK | \n",
+ " no | \n",
+ " in a single individual | \n",
+ " CYP2C19 *1/*2 is associated with increased trough concentration of clomipramine and desmethyl clomipramine. | \n",
+ " *1/*2 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " trough concentration of | \n",
+ " and | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variant Annotation ID Variant/Haplotypes Gene \\\n",
+ "0 1451834452 CYP3A4*1, CYP3A4*17 CYP3A4 \n",
+ "1 1451159680 rs5031016 CYP2A6 \n",
+ "2 1451306860 CYP2C9*11 CYP2C9 \n",
+ "3 1448997750 CYP2B6*1, CYP2B6*18 CYP2B6 \n",
+ "4 1448631821 CYP2C19*1, CYP2C19*2 CYP2C19 \n",
+ "\n",
+ " Drug(s) PMID Phenotype Category \\\n",
+ "0 nifedipine 15634941 Other, Metabolism/PK \n",
+ "1 warfarin 22248286 Dosage \n",
+ "2 warfarin 33350885 Dosage \n",
+ "3 efavirenz 16495778 Metabolism/PK \n",
+ "4 clomipramine, desmethyl clomipramine 28470111 Metabolism/PK \n",
+ "\n",
+ " Significance \\\n",
+ "0 not stated \n",
+ "1 no \n",
+ "2 not stated \n",
+ "3 yes \n",
+ "4 no \n",
+ "\n",
+ " Notes \\\n",
+ "0 in vitro expression of the recombinant CYP3A4*17 allelic protein and the wild-type protein \n",
+ "1 No association was found between this variant and warfarin-maintenance dose. Described as CYP2A6*7 in this study. \n",
+ "2 \"This case suggests that CYP2C9 *11/*11 carriers require approximately two thirds less warfarin than CYP2C9\" normal function homozygotes. \n",
+ "3 Please note that in the paper the allele was referred to as CYP2B6*16. CYP2B6*16 and *18 alleles have been consolidated by PharmVar in Jan 2020, with *16 now listed as a suballele of *18 (CYP2B6*18.002). This annotation is updated to be on CYP2B6*18, instead of CYP2B6*16. \n",
+ "4 in a single individual \n",
+ "\n",
+ " Sentence \\\n",
+ "0 CYP3A4 *17 is associated with decreased metabolism of nifedipine as compared to CYP3A4 *1. \n",
+ "1 Allele G is not associated with increased dose of warfarin in people with an international normalized ratio (INR) of 2.0-3.0 as compared to allele A. \n",
+ "2 CYP2C9 *11/*11 is associated with decreased dose of warfarin. \n",
+ "3 CYP2B6 *1/*18 is associated with increased concentrations of efavirenz in people with HIV Infections as compared to CYP2B6 *1/*1. \n",
+ "4 CYP2C19 *1/*2 is associated with increased trough concentration of clomipramine and desmethyl clomipramine. \n",
+ "\n",
+ " Alleles Specialty Population Metabolizer types isPlural \\\n",
+ "0 *17 NaN NaN Is \n",
+ "1 G NaN NaN Is \n",
+ "2 *11/*11 NaN NaN Is \n",
+ "3 *1/*18 NaN NaN Is \n",
+ "4 *1/*2 NaN NaN Is \n",
+ "\n",
+ " Is/Is Not associated Direction of effect PD/PK terms \\\n",
+ "0 Associated with decreased metabolism of \n",
+ "1 Not associated with increased dose of \n",
+ "2 Associated with decreased dose of \n",
+ "3 Associated with increased concentrations of \n",
+ "4 Associated with increased trough concentration of \n",
+ "\n",
+ " Multiple drugs And/or Population types \\\n",
+ "0 NaN NaN \n",
+ "1 NaN in people with \n",
+ "2 NaN NaN \n",
+ "3 NaN in people with \n",
+ "4 and NaN \n",
+ "\n",
+ " Population Phenotypes or diseases \\\n",
+ "0 NaN \n",
+ "1 Other:an international normalized ratio (INR) of 2.0-3.0 \n",
+ "2 NaN \n",
+ "3 Disease:HIV Infections \n",
+ "4 NaN \n",
+ "\n",
+ " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n",
+ "0 NaN *1 \n",
+ "1 NaN A \n",
+ "2 NaN NaN \n",
+ "3 NaN *1/*1 \n",
+ "4 NaN NaN \n",
+ "\n",
+ " Comparison Metabolizer types \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN "
+ ]
+ },
+ "execution_count": 245,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Looking at the data - output suppressed for brevity\n",
+ "var_drug_ann.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 246,
+ "id": "12dada29-2a14-4992-acf4-45115aed57d2",
+ "metadata": {
+ "pycharm": {
+ "name": "#%%\n"
+ },
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variant Annotation ID | \n",
+ " Variant/Haplotypes | \n",
+ " Gene | \n",
+ " Drug(s) | \n",
+ " PMID | \n",
+ " Phenotype Category | \n",
+ " Significance | \n",
+ " Notes | \n",
+ " Sentence | \n",
+ " Alleles | \n",
+ " Specialty Population | \n",
+ " Assay type | \n",
+ " Metabolizer types | \n",
+ " isPlural | \n",
+ " Is/Is Not associated | \n",
+ " Direction of effect | \n",
+ " Functional terms | \n",
+ " Gene/gene product | \n",
+ " When treated with/exposed to/when assayed with | \n",
+ " Multiple drugs And/or | \n",
+ " Cell type | \n",
+ " Comparison Allele(s) or Genotype(s) | \n",
+ " Comparison Metabolizer types | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1451148445 | \n",
+ " CYP2C19*1, CYP2C19*17 | \n",
+ " CYP2C19 | \n",
+ " normeperidine | \n",
+ " 30902024 | \n",
+ " NaN | \n",
+ " not stated | \n",
+ " In other in vitro experiments, normeperidine formation was significantly correlated with CYP2C19 activity, as measured by S-mephenytoin 4-hydroxylation. | \n",
+ " CYP2C19 *17/*17 is associated with increased formation of normeperidine as compared to CYP2C19 *1/*1 + *1/*17. | \n",
+ " *17/*17 | \n",
+ " NaN | \n",
+ " in human liver microsomes | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " formation of | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " *1/*1 + *1/*17 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1447814273 | \n",
+ " rs9923231 | \n",
+ " VKORC1 | \n",
+ " NaN | \n",
+ " 26847243 | \n",
+ " Other | \n",
+ " no | \n",
+ " NaN | \n",
+ " Allele T is not associated with transcription of VKORC1 in HepG2 cells as compared to allele C. | \n",
+ " T | \n",
+ " NaN | \n",
+ " luciferase assay | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Not associated with | \n",
+ " NaN | \n",
+ " transcription of | \n",
+ " VKORC1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " in HepG2 cells | \n",
+ " C | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 1447814277 | \n",
+ " rs56314408 | \n",
+ " VKORC1 | \n",
+ " NaN | \n",
+ " 26847243 | \n",
+ " Other | \n",
+ " yes | \n",
+ " In the European population, this SNPs is in high LD with rs9923231 but not other populations. This SNP disrupts a binding motif for transcription factor TFAP2A/C. | \n",
+ " Allele C is associated with increased transcription of VKORC1 in HepG2 cells as compared to allele T. | \n",
+ " C | \n",
+ " NaN | \n",
+ " luciferase assay | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " transcription of | \n",
+ " VKORC1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " in HepG2 cells | \n",
+ " T | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1447990384 | \n",
+ " rs1065852 | \n",
+ " CYP2D6 | \n",
+ " bufuralol | \n",
+ " 2211621 | \n",
+ " Metabolism/PK | \n",
+ " not stated | \n",
+ " In vitro experiments showed a significant decrease in CYP2D6 activity for the variant construct expressed in COS-1 cells as compared to wild-type. | \n",
+ " Allele A is associated with decreased activity of CYP2D6 when assayed with bufuralol in COS-1 cells as compared to allele G. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " decreased | \n",
+ " activity of | \n",
+ " CYP2D6 | \n",
+ " when assayed with | \n",
+ " NaN | \n",
+ " in COS-1 cells | \n",
+ " G | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1448281185 | \n",
+ " CYP2B6*1, CYP2B6*6 | \n",
+ " CYP2B6 | \n",
+ " bupropion | \n",
+ " 27439448 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " The ratio of hydroxybupropion versus bupropion (AUC_hyd/ AUC_bup) in terms of area under the time-concentration curve (AUC) was used to assay CYP2B6 activity. | \n",
+ " CYP2B6 *1/*1 is associated with increased activity of CYP2B6 when assayed with bupropion as compared to CYP2B6 *1/*6. | \n",
+ " *1/*1 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " activity of | \n",
+ " CYP2B6 | \n",
+ " when assayed with | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " *1/*6 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variant Annotation ID Variant/Haplotypes Gene Drug(s) \\\n",
+ "0 1451148445 CYP2C19*1, CYP2C19*17 CYP2C19 normeperidine \n",
+ "1 1447814273 rs9923231 VKORC1 NaN \n",
+ "2 1447814277 rs56314408 VKORC1 NaN \n",
+ "3 1447990384 rs1065852 CYP2D6 bufuralol \n",
+ "4 1448281185 CYP2B6*1, CYP2B6*6 CYP2B6 bupropion \n",
+ "\n",
+ " PMID Phenotype Category Significance \\\n",
+ "0 30902024 NaN not stated \n",
+ "1 26847243 Other no \n",
+ "2 26847243 Other yes \n",
+ "3 2211621 Metabolism/PK not stated \n",
+ "4 27439448 Efficacy yes \n",
+ "\n",
+ " Notes \\\n",
+ "0 In other in vitro experiments, normeperidine formation was significantly correlated with CYP2C19 activity, as measured by S-mephenytoin 4-hydroxylation. \n",
+ "1 NaN \n",
+ "2 In the European population, this SNPs is in high LD with rs9923231 but not other populations. This SNP disrupts a binding motif for transcription factor TFAP2A/C. \n",
+ "3 In vitro experiments showed a significant decrease in CYP2D6 activity for the variant construct expressed in COS-1 cells as compared to wild-type. \n",
+ "4 The ratio of hydroxybupropion versus bupropion (AUC_hyd/ AUC_bup) in terms of area under the time-concentration curve (AUC) was used to assay CYP2B6 activity. \n",
+ "\n",
+ " Sentence \\\n",
+ "0 CYP2C19 *17/*17 is associated with increased formation of normeperidine as compared to CYP2C19 *1/*1 + *1/*17. \n",
+ "1 Allele T is not associated with transcription of VKORC1 in HepG2 cells as compared to allele C. \n",
+ "2 Allele C is associated with increased transcription of VKORC1 in HepG2 cells as compared to allele T. \n",
+ "3 Allele A is associated with decreased activity of CYP2D6 when assayed with bufuralol in COS-1 cells as compared to allele G. \n",
+ "4 CYP2B6 *1/*1 is associated with increased activity of CYP2B6 when assayed with bupropion as compared to CYP2B6 *1/*6. \n",
+ "\n",
+ " Alleles Specialty Population Assay type Metabolizer types \\\n",
+ "0 *17/*17 NaN in human liver microsomes NaN \n",
+ "1 T NaN luciferase assay NaN \n",
+ "2 C NaN luciferase assay NaN \n",
+ "3 A NaN NaN NaN \n",
+ "4 *1/*1 NaN NaN NaN \n",
+ "\n",
+ " isPlural Is/Is Not associated Direction of effect Functional terms \\\n",
+ "0 Is Associated with increased formation of \n",
+ "1 Is Not associated with NaN transcription of \n",
+ "2 Is Associated with increased transcription of \n",
+ "3 Is Associated with decreased activity of \n",
+ "4 Is Associated with increased activity of \n",
+ "\n",
+ " Gene/gene product When treated with/exposed to/when assayed with \\\n",
+ "0 NaN NaN \n",
+ "1 VKORC1 NaN \n",
+ "2 VKORC1 NaN \n",
+ "3 CYP2D6 when assayed with \n",
+ "4 CYP2B6 when assayed with \n",
+ "\n",
+ " Multiple drugs And/or Cell type Comparison Allele(s) or Genotype(s) \\\n",
+ "0 NaN NaN *1/*1 + *1/*17 \n",
+ "1 NaN in HepG2 cells C \n",
+ "2 NaN in HepG2 cells T \n",
+ "3 NaN in COS-1 cells G \n",
+ "4 NaN NaN *1/*6 \n",
+ "\n",
+ " Comparison Metabolizer types \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN "
+ ]
+ },
+ "execution_count": 246,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "var_fa_ann.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 247,
+ "id": "70537798-e6fb-47f5-8ba2-6859c4b645a2",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Variant Annotation ID | \n",
+ " Variant/Haplotypes | \n",
+ " Gene | \n",
+ " Drug(s) | \n",
+ " PMID | \n",
+ " Phenotype Category | \n",
+ " Significance | \n",
+ " Notes | \n",
+ " Sentence | \n",
+ " Alleles | \n",
+ " Specialty Population | \n",
+ " Metabolizer types | \n",
+ " isPlural | \n",
+ " Is/Is Not associated | \n",
+ " Direction of effect | \n",
+ " Side effect/efficacy/other | \n",
+ " Phenotype | \n",
+ " Multiple phenotypes And/or | \n",
+ " When treated with/exposed to/when assayed with | \n",
+ " Multiple drugs And/or | \n",
+ " Population types | \n",
+ " Population Phenotypes or diseases | \n",
+ " Multiple phenotypes or diseases And/or | \n",
+ " Comparison Allele(s) or Genotype(s) | \n",
+ " Comparison Metabolizer types | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1449169911 | \n",
+ " HLA-B*35:08 | \n",
+ " HLA-B | \n",
+ " lamotrigine | \n",
+ " 29238301 | \n",
+ " Toxicity | \n",
+ " no | \n",
+ " The allele was not significant when comparing allele frequency in cases of severe cutaneous adverse reactions (SCAR), Stevens-Johnson Syndrome (SJS) and Maculopapular Exanthema (MPE) (1/15) and controls (individuals without AEs who took lamotrigine) (0/50). The allele was significant when comparing between cases (1/15) and the general population (1/986). | \n",
+ " HLA-B *35:08 is not associated with likelihood of Maculopapular Exanthema, severe cutaneous adverse reactions or Stevens-Johnson Syndrome when treated with lamotrigine in people with Epilepsy. | \n",
+ " *35:08 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Not associated with | \n",
+ " NaN | \n",
+ " likelihood of | \n",
+ " Side Effect:Maculopapular Exanthema, Side Effect:severe cutaneous adverse reactions, Side Effect:Stevens-Johnson Syndrome | \n",
+ " or | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Epilepsy | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 982022165 | \n",
+ " rs45607939 | \n",
+ " NAT2 | \n",
+ " sulfamethoxazole / trimethoprim | \n",
+ " 22850190 | \n",
+ " Toxicity | \n",
+ " no | \n",
+ " Minor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls. | \n",
+ " Allele T is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection. | \n",
+ " T | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Not associated with | \n",
+ " increased | \n",
+ " risk of | \n",
+ " Disease:Hypersensitivity | \n",
+ " NaN | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Infection | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 982022148 | \n",
+ " rs1799930 | \n",
+ " NAT2 | \n",
+ " sulfamethoxazole / trimethoprim | \n",
+ " 22850190 | \n",
+ " Toxicity | \n",
+ " no | \n",
+ " Minor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls. | \n",
+ " Allele A is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Not associated with | \n",
+ " increased | \n",
+ " risk of | \n",
+ " Disease:Hypersensitivity | \n",
+ " NaN | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Infection | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 1451283480 | \n",
+ " rs16969968 | \n",
+ " CHRNA5 | \n",
+ " NaN | \n",
+ " 22071378 | \n",
+ " Other | \n",
+ " yes | \n",
+ " this was from meta-analysis of 27 studies but the number of total cases and the risk allele not clearly specified. Minor allele frequency was given for A allele. Introduction states that variant is Asp398Asn, where Asn (A allele) has lower nicotine response than Asp (G allele) and may be at greater risk for nicotine addiction. | \n",
+ " Allele A is associated with increased severity of Tobacco Use Disorder in people with Tobacco Use Disorder. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " severity of | \n",
+ " Other:Tobacco Use Disorder | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Other:Tobacco Use Disorder | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1444696916 | \n",
+ " rs267606617 | \n",
+ " MT-RNR1 | \n",
+ " streptomycin | \n",
+ " 7689389 | \n",
+ " Toxicity | \n",
+ " not stated | \n",
+ " Pedigree analysis with 3 separate families. Within the maternal lines, 15 individuals had the 1555G variant, took aminoglycoside antibiotics, and developed hearing loss. 100% of individuals with the 1555G variant who took aminoglycosides developed hearing loss. Homoplasmic. Please note that no statistical analyses were done. | \n",
+ " Allele G is associated with Ototoxicity when treated with streptomycin as compared to allele A. | \n",
+ " G | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Side Effect:Ototoxicity | \n",
+ " and | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " A | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Variant Annotation ID Variant/Haplotypes Gene \\\n",
+ "0 1449169911 HLA-B*35:08 HLA-B \n",
+ "1 982022165 rs45607939 NAT2 \n",
+ "2 982022148 rs1799930 NAT2 \n",
+ "3 1451283480 rs16969968 CHRNA5 \n",
+ "4 1444696916 rs267606617 MT-RNR1 \n",
+ "\n",
+ " Drug(s) PMID Phenotype Category Significance \\\n",
+ "0 lamotrigine 29238301 Toxicity no \n",
+ "1 sulfamethoxazole / trimethoprim 22850190 Toxicity no \n",
+ "2 sulfamethoxazole / trimethoprim 22850190 Toxicity no \n",
+ "3 NaN 22071378 Other yes \n",
+ "4 streptomycin 7689389 Toxicity not stated \n",
+ "\n",
+ " Notes \\\n",
+ "0 The allele was not significant when comparing allele frequency in cases of severe cutaneous adverse reactions (SCAR), Stevens-Johnson Syndrome (SJS) and Maculopapular Exanthema (MPE) (1/15) and controls (individuals without AEs who took lamotrigine) (0/50). The allele was significant when comparing between cases (1/15) and the general population (1/986). \n",
+ "1 Minor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls. \n",
+ "2 Minor allele frequencies were compared between cases (with drug-induced hypersensitivity) and controls. \n",
+ "3 this was from meta-analysis of 27 studies but the number of total cases and the risk allele not clearly specified. Minor allele frequency was given for A allele. Introduction states that variant is Asp398Asn, where Asn (A allele) has lower nicotine response than Asp (G allele) and may be at greater risk for nicotine addiction. \n",
+ "4 Pedigree analysis with 3 separate families. Within the maternal lines, 15 individuals had the 1555G variant, took aminoglycoside antibiotics, and developed hearing loss. 100% of individuals with the 1555G variant who took aminoglycosides developed hearing loss. Homoplasmic. Please note that no statistical analyses were done. \n",
+ "\n",
+ " Sentence \\\n",
+ "0 HLA-B *35:08 is not associated with likelihood of Maculopapular Exanthema, severe cutaneous adverse reactions or Stevens-Johnson Syndrome when treated with lamotrigine in people with Epilepsy. \n",
+ "1 Allele T is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection. \n",
+ "2 Allele A is not associated with increased risk of Hypersensitivity when treated with sulfamethoxazole / trimethoprim in people with Infection. \n",
+ "3 Allele A is associated with increased severity of Tobacco Use Disorder in people with Tobacco Use Disorder. \n",
+ "4 Allele G is associated with Ototoxicity when treated with streptomycin as compared to allele A. \n",
+ "\n",
+ " Alleles Specialty Population Metabolizer types isPlural \\\n",
+ "0 *35:08 NaN NaN Is \n",
+ "1 T NaN NaN Is \n",
+ "2 A NaN NaN Is \n",
+ "3 A NaN NaN Is \n",
+ "4 G NaN NaN Is \n",
+ "\n",
+ " Is/Is Not associated Direction of effect Side effect/efficacy/other \\\n",
+ "0 Not associated with NaN likelihood of \n",
+ "1 Not associated with increased risk of \n",
+ "2 Not associated with increased risk of \n",
+ "3 Associated with increased severity of \n",
+ "4 Associated with NaN NaN \n",
+ "\n",
+ " Phenotype \\\n",
+ "0 Side Effect:Maculopapular Exanthema, Side Effect:severe cutaneous adverse reactions, Side Effect:Stevens-Johnson Syndrome \n",
+ "1 Disease:Hypersensitivity \n",
+ "2 Disease:Hypersensitivity \n",
+ "3 Other:Tobacco Use Disorder \n",
+ "4 Side Effect:Ototoxicity \n",
+ "\n",
+ " Multiple phenotypes And/or When treated with/exposed to/when assayed with \\\n",
+ "0 or when treated with \n",
+ "1 NaN when treated with \n",
+ "2 NaN when treated with \n",
+ "3 NaN NaN \n",
+ "4 and when treated with \n",
+ "\n",
+ " Multiple drugs And/or Population types Population Phenotypes or diseases \\\n",
+ "0 NaN in people with Disease:Epilepsy \n",
+ "1 NaN in people with Disease:Infection \n",
+ "2 NaN in people with Disease:Infection \n",
+ "3 NaN in people with Other:Tobacco Use Disorder \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN A \n",
+ "\n",
+ " Comparison Metabolizer types \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN "
+ ]
+ },
+ "execution_count": 247,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "var_pheno_ann.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b84a02e-1791-4a6b-8223-d904442ddf7c",
+ "metadata": {},
+ "source": [
+ "The 3 annotations tables provide evidence for the clinical annotations, can be connected by joining with the `clinical_ann_evidence.tsv` file. In general a clinical annotation can have multiple variant annotations as evidence, and a variant annotation can be used as evidence for multiple clinical annotations (in theory, I've not actually observed this).\n",
+ "\n",
+ "Each of these tables has a \"Direction of effect\" column, and the type of \"effect\" is different for each - likelihood of side effects, formation of product, metabolism of drug, etc.\n",
+ "\n",
+ "**Question for OT**: when we say \"direction of effect\", do we mean any of these \"effects\"? I.e. should we include all three of these tables or focus on one?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "d43fd147-43b4-4541-9595-f757da937e60",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "clinical_annotations = read_tsv_to_df(os.path.join(data_dir, 'clinical_annotations.tsv'))\n",
+ "clinical_ann_evidence = read_tsv_to_df(os.path.join(data_dir, 'clinical_ann_evidence.tsv'))\n",
+ "clinical_ann_alleles = read_tsv_to_df(os.path.join(data_dir, 'clinical_ann_alleles.tsv'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "id": "1a553aac-a9f7-4579-ab7c-05fa6f90ddfe",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "main_df = pd.merge(clinical_annotations, clinical_ann_evidence, how='left', on=ID_COL_NAME)\n",
+ "main_df = main_df[[\n",
+ " # Main table\n",
+ " 'Clinical Annotation ID', 'Variant/Haplotypes', 'Gene', 'Level of Evidence', 'Phenotype Category', 'Drug(s)', 'Phenotype(s)',\n",
+ " # Evidence table\n",
+ " 'Evidence ID', 'Evidence Type', 'PMID', 'Summary',\n",
+ "]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5945f09b-9516-4d65-ad75-d27a5a1890cf",
+ "metadata": {},
+ "source": [
+ "#### Example clinical annotation\n",
+ "\n",
+ "[Top of page](#Table-of-contents)\n",
+ "\n",
+ "Looking at [981755803](https://www.pharmgkb.org/clinicalAnnotation/981755803), which has all three types of variant annotation evidence as well as label/guideline evidence."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "id": "f5bb8c9e-baeb-4ef6-9373-7c38a333601c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_981755803 = main_df[main_df[ID_COL_NAME] == '981755803']\n",
+ "\n",
+ "df_981755803_drug = pd.merge(df_981755803, var_drug_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_drug'))\n",
+ "df_981755803_pheno = pd.merge(df_981755803, var_pheno_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_pheno'))\n",
+ "df_981755803_fa = pd.merge(df_981755803, var_fa_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_fa'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 229,
+ "id": "811adf62-6f79-4451-9a3e-2772057e4a01",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Number of evidence 30\n",
+ "Number of var/drug evidence 24\n",
+ "Number of var/fa evidence 2\n",
+ "Number of var/pheno evidence 2\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Number of evidence', len(df_981755803))\n",
+ "print('Number of var/drug evidence', len(df_981755803_drug))\n",
+ "print('Number of var/fa evidence', len(df_981755803_fa))\n",
+ "print('Number of var/pheno evidence', len(df_981755803_pheno))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 248,
+ "id": "c4a15bc6-90fc-4d4b-bd0e-621b8ffd6093",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Clinical Annotation ID | \n",
+ " Variant/Haplotypes | \n",
+ " Gene | \n",
+ " Level of Evidence | \n",
+ " Phenotype Category | \n",
+ " Drug(s) | \n",
+ " Phenotype(s) | \n",
+ " Evidence ID | \n",
+ " Evidence Type | \n",
+ " PMID | \n",
+ " Summary | \n",
+ " Variant Annotation ID | \n",
+ " Variant/Haplotypes_var_drug | \n",
+ " Gene_var_drug | \n",
+ " Drug(s)_var_drug | \n",
+ " PMID_var_drug | \n",
+ " Phenotype Category_var_drug | \n",
+ " Significance | \n",
+ " Notes | \n",
+ " Sentence | \n",
+ " Alleles | \n",
+ " Specialty Population | \n",
+ " Metabolizer types | \n",
+ " isPlural | \n",
+ " Is/Is Not associated | \n",
+ " Direction of effect | \n",
+ " PD/PK terms | \n",
+ " Multiple drugs And/or | \n",
+ " Population types | \n",
+ " Population Phenotypes or diseases | \n",
+ " Multiple phenotypes or diseases And/or | \n",
+ " Comparison Allele(s) or Genotype(s) | \n",
+ " Comparison Metabolizer types | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 981755665 | \n",
+ " Variant Drug Annotation | \n",
+ " 21083385 | \n",
+ " Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 981755665 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 21083385 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " Clinical trials were carried out to test efficacy of ivacaftor selecting only patients with the CFTR G551D mutation on at least one allele (genotype AA or AG). | \n",
+ " Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " AA + AG | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Are | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 981755678 | \n",
+ " Variant Drug Annotation | \n",
+ " 22047557 | \n",
+ " Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 981755678 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 22047557 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " A clinical trial that selected patients with the G551D CFTR mutation (rs75527207 genotype AA or AG). Patients without this mutation were excluded. One patient included in the placebo group was homozygous for F508del (rs113993960 genotype del/del). | \n",
+ " Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " AA + AG | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Are | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 982009991 | \n",
+ " Variant Drug Annotation | \n",
+ " 23590265 | \n",
+ " Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | \n",
+ " 982009991 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 23590265 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " Patients aged 6-11 at time of screening who had at least one allele with the G551D mutation (allele A at position rs75527207) were recruited for this trial. Ivacaftor is only indicated in CF patients with this mutation. Significant improvements in lung function were seen in the ivacaftor treatment group compared to placebo. | \n",
+ " Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in children with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1183629335 | \n",
+ " Variant Drug Annotation | \n",
+ " 24066763 | \n",
+ " Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis. | \n",
+ " 1183629335 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 24066763 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " Case report of a female homozygous for the G551D CFTR mutation (genotype AA) in which ivacaftor was efficacious: increased absolute change in percent of predicted FEV1, increased weight and walk distance and decreased sweat chloride levels over a 12 month course with no sign of plateau to date. | \n",
+ " Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis. | \n",
+ " AA | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in women with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1448423752 | \n",
+ " Variant Drug Annotation | \n",
+ " 27773592 | \n",
+ " Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | \n",
+ " 1448423752 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 27773592 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " The outcome of change in sweat chloride was correlated with change in FEV1 in patients with cystic fibrosis and found to have improved results for both. | \n",
+ " Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | \n",
+ " AA + AG | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " GG | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449191908 | \n",
+ " Variant Drug Annotation | \n",
+ " 25682022 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449191908 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 25682022 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " Study was an expanded access program targeted at patients with severe lung disease and was not powered to determine efficacy. Majority of patients reported an improvement in FEV following 24 weeks of treatment. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192055 | \n",
+ " Variant Drug Annotation | \n",
+ " 28711222 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192055 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 28711222 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " G551D allele. Statistically significant increases in FEV1, weight and BMI and statistically significant decreases in sweat chloride level, the number of days of antibiotic treatment and in the use of some maintenance treatments.; No differences in bone density, pancreatic insufficiency and cystic fibrosis related diabetes were observed. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192093 | \n",
+ " Variant Drug Annotation | \n",
+ " 25311995 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192093 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 25311995 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " G551 D allele. Increases in FEV1, body weight, CFQ-R scores and time to first pulmonary exacerbation were observed. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192439 | \n",
+ " Variant Drug Annotation | \n",
+ " 28611235 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192439 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 28611235 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " G551D allele. FEV1, Alfred wellness score, exercise time, CFQ-R score and sweat chloride levels showed a significant improvement following ivacaftor treatment as compared to placebo while other outcomes (VO2, ventilation, cardiac response nd recovery following exercise) did not. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192481 | \n",
+ " Variant Drug Annotation | \n",
+ " 26135562 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192481 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 26135562 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " G551D allele. Analysis of CFQ-R scores from participants in the STRIVE trial. Scores for eating problems, health perceptions, physical functioning, respiratory symptoms, social functioning, treatment burden and vitality showed significant improvements following ivacaftor treatment. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192494 | \n",
+ " Variant Drug Annotation | \n",
+ " 25171465 | \n",
+ " Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | \n",
+ " 1449192494 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 25171465 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " Case study of a pediatric cystic fibrosis patient. Improvements in sweat chloride, BMI, bronchiectasis and lung function reported following ivacaftor treatment. | \n",
+ " Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in children with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192576 | \n",
+ " Variant Drug Annotation | \n",
+ " 25755212 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192576 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 25755212 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " Post hoc analysis of clinical outcomes of the STRIVE and ENVISION trials. Participants were split into tertiles based on FEV1 score and outcomes in change in baseline FEV1, body weight, CFQ-R score and sweat chloride levels as well as number of days of pulmonary exacerbation were assessed. All outcomes were significantly improved in the upper tertile, all outcomes apart from number of days of pulmonary exacerbation were significantly improved in the middle tertile and absolute change in FEV1, body weight and sweat chloride levels were significantly improved in the lower tertile. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192615 | \n",
+ " Variant Drug Annotation | \n",
+ " 26568242 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192615 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 26568242 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " Response measured by changes in sweat chloride levels, FEV1 and BMI. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192709 | \n",
+ " Variant Drug Annotation | \n",
+ " 25473543 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192709 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 25473543 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " G551D allele. Case report of three patients with the F508del/G551D genotype. Reported improvements in FEV1, body weight, sweat chloride levels and scores in the respiratory domain of the CFQ-R. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192721 | \n",
+ " Variant Drug Annotation | \n",
+ " 25145599 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192721 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 25145599 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " G551D allele. Significant increases in %FVC and %FEV1 compared to baseline were seen at 6 months of ivacaftor treatment, but both measures declined to baseline by 12 months of ivacaftor treatment. Significant improvements in BMI, body weight, sinus disease status and sweat chloride levels were seen at 12 months of ivacaftor treatment. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1450043422 | \n",
+ " Variant Drug Annotation | \n",
+ " 23628510 | \n",
+ " Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | \n",
+ " 1450043422 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 23628510 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " NaN | \n",
+ " Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in children with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1184512440 | \n",
+ " Variant Drug Annotation | \n",
+ " 25049054 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1184512440 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 25049054 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " Patients with at least one G551D-CFTR allele were recruited and treated with ivacaftor for one year. Mean weight and BMI improved at 6 months from baseline, but only mean weight was increased again at 12 months. Mean percentage FVC, FEV1 and FEF25-75% returned to baseline levels by 12 months of treatment. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 981755746 | \n",
+ " Variant Drug Annotation | \n",
+ " 22942289 | \n",
+ " Allele A is associated with increased response to ivacaftor. | \n",
+ " 981755746 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 22942289 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " In vitro studies using proteoliposomes containing CFTR, or CFTR with the G551D mutation (rs75527207 allele A), or CFTR with the F508del mutation (rs113993960 allele del). Ivacaftor in the presence of ATP potentiated channel activity of CFTR-G551D. | \n",
+ " Allele A is associated with increased response to ivacaftor. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " response to | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 981755699 | \n",
+ " Variant Drug Annotation | \n",
+ " 19846789 | \n",
+ " Allele A is associated with increased response to ivacaftor. | \n",
+ " 981755699 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 19846789 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " In vitro assays that show ivacaftor potentiates CFTR with the G551D mutation (rs75527207 allele A) - see details described in study parameters. | \n",
+ " Allele A is associated with increased response to ivacaftor. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " response to | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 981755787 | \n",
+ " Variant Drug Annotation | \n",
+ " 22293084 | \n",
+ " Allele A is associated with increased response to ivacaftor. | \n",
+ " 981755787 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 22293084 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " as compared to baseline. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Before treatment, cells were activated by exposure to PKA and ATP before ivacaftor treatment. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Cells expressing G551D-CFTR (rs75527207 allele A) responded to ivacaftor treatment with a significantly enhanced channel open probability and increased chloride transport. Single channel current amplitude at 80mV was not significantly enhanced. | \n",
+ " Allele A is associated with increased response to ivacaftor. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " response to | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1043737597 | \n",
+ " Variant Drug Annotation | \n",
+ " 23757359 | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1043737597 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 23757359 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " A retrospective study of patients in Germany with severe Cystic Fibrosis (FEV1 <40%predicted) with the G551D mutation who were treated with ivacaftor. On average, FEV1and body weight increased significantly, though response was variable in this patient group and several patients discontinued ivacaftor for different complications. | \n",
+ " Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 982006840 | \n",
+ " Variant Drug Annotation | \n",
+ " 23313410 | \n",
+ " Allele A is associated with response to ivacaftor in men with Cystic Fibrosis. | \n",
+ " 982006840 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 23313410 | \n",
+ " Efficacy | \n",
+ " not stated | \n",
+ " A case report of lung function improvements 6 months after treatment with ivacaftor in a male patient with severe lung disease - he had the CFTR G511D (rsrs75527207 allele A)/deltaF508 genotype (rs113993960 del CTT) and so could be given ivacaftor. | \n",
+ " Allele A is associated with response to ivacaftor in men with Cystic Fibrosis. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in men with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1446903789 | \n",
+ " Variant Drug Annotation | \n",
+ " 24461666 | \n",
+ " Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1446903789 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 24461666 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " The authors wanted to assess the efficacy of ivacaftor in patients with cystic fibrosis who have normal spirometry. The authors assessed lung function improvement in patients using lung clearance index (LCI) as well as forced expiratory volume in 1 second (FEV1), and only included patients with < 90% FEV1 values. The primary outcome was change in LCI from baseline. This was a phase 2, multi-centre, placebo-controlled, 2x2 crossover study. One group, sequence 1, took placebo first, followed by 28 day washout, then took ivacaftor 150 mg 2x daily for 4 weeks. The second group had the sequence of treatment reversed. | \n",
+ " Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. | \n",
+ " AA + AG | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Are | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1448099051 | \n",
+ " Variant Drug Annotation | \n",
+ " 27158673 | \n",
+ " Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | \n",
+ " 1448099051 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 27158673 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " Measured in adult patients, with changes in lung volume, sweat chloride, distensibility, wall thickness, expiratory lumen area, and inspiratory lumen area measured before starting ivacaftor and 48 hour after starting ivacaftor. | \n",
+ " Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | \n",
+ " AA + AG | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Are | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " response to | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " GG | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Clinical Annotation ID Variant/Haplotypes Gene Level of Evidence \\\n",
+ "0 981755803 rs75527207 CFTR 1A \n",
+ "1 981755803 rs75527207 CFTR 1A \n",
+ "2 981755803 rs75527207 CFTR 1A \n",
+ "3 981755803 rs75527207 CFTR 1A \n",
+ "4 981755803 rs75527207 CFTR 1A \n",
+ "5 981755803 rs75527207 CFTR 1A \n",
+ "6 981755803 rs75527207 CFTR 1A \n",
+ "7 981755803 rs75527207 CFTR 1A \n",
+ "8 981755803 rs75527207 CFTR 1A \n",
+ "9 981755803 rs75527207 CFTR 1A \n",
+ "10 981755803 rs75527207 CFTR 1A \n",
+ "11 981755803 rs75527207 CFTR 1A \n",
+ "12 981755803 rs75527207 CFTR 1A \n",
+ "13 981755803 rs75527207 CFTR 1A \n",
+ "14 981755803 rs75527207 CFTR 1A \n",
+ "15 981755803 rs75527207 CFTR 1A \n",
+ "16 981755803 rs75527207 CFTR 1A \n",
+ "17 981755803 rs75527207 CFTR 1A \n",
+ "18 981755803 rs75527207 CFTR 1A \n",
+ "19 981755803 rs75527207 CFTR 1A \n",
+ "20 981755803 rs75527207 CFTR 1A \n",
+ "21 981755803 rs75527207 CFTR 1A \n",
+ "22 981755803 rs75527207 CFTR 1A \n",
+ "23 981755803 rs75527207 CFTR 1A \n",
+ "\n",
+ " Phenotype Category Drug(s) Phenotype(s) Evidence ID \\\n",
+ "0 Efficacy ivacaftor Cystic Fibrosis 981755665 \n",
+ "1 Efficacy ivacaftor Cystic Fibrosis 981755678 \n",
+ "2 Efficacy ivacaftor Cystic Fibrosis 982009991 \n",
+ "3 Efficacy ivacaftor Cystic Fibrosis 1183629335 \n",
+ "4 Efficacy ivacaftor Cystic Fibrosis 1448423752 \n",
+ "5 Efficacy ivacaftor Cystic Fibrosis 1449191908 \n",
+ "6 Efficacy ivacaftor Cystic Fibrosis 1449192055 \n",
+ "7 Efficacy ivacaftor Cystic Fibrosis 1449192093 \n",
+ "8 Efficacy ivacaftor Cystic Fibrosis 1449192439 \n",
+ "9 Efficacy ivacaftor Cystic Fibrosis 1449192481 \n",
+ "10 Efficacy ivacaftor Cystic Fibrosis 1449192494 \n",
+ "11 Efficacy ivacaftor Cystic Fibrosis 1449192576 \n",
+ "12 Efficacy ivacaftor Cystic Fibrosis 1449192615 \n",
+ "13 Efficacy ivacaftor Cystic Fibrosis 1449192709 \n",
+ "14 Efficacy ivacaftor Cystic Fibrosis 1449192721 \n",
+ "15 Efficacy ivacaftor Cystic Fibrosis 1450043422 \n",
+ "16 Efficacy ivacaftor Cystic Fibrosis 1184512440 \n",
+ "17 Efficacy ivacaftor Cystic Fibrosis 981755746 \n",
+ "18 Efficacy ivacaftor Cystic Fibrosis 981755699 \n",
+ "19 Efficacy ivacaftor Cystic Fibrosis 981755787 \n",
+ "20 Efficacy ivacaftor Cystic Fibrosis 1043737597 \n",
+ "21 Efficacy ivacaftor Cystic Fibrosis 982006840 \n",
+ "22 Efficacy ivacaftor Cystic Fibrosis 1446903789 \n",
+ "23 Efficacy ivacaftor Cystic Fibrosis 1448099051 \n",
+ "\n",
+ " Evidence Type PMID \\\n",
+ "0 Variant Drug Annotation 21083385 \n",
+ "1 Variant Drug Annotation 22047557 \n",
+ "2 Variant Drug Annotation 23590265 \n",
+ "3 Variant Drug Annotation 24066763 \n",
+ "4 Variant Drug Annotation 27773592 \n",
+ "5 Variant Drug Annotation 25682022 \n",
+ "6 Variant Drug Annotation 28711222 \n",
+ "7 Variant Drug Annotation 25311995 \n",
+ "8 Variant Drug Annotation 28611235 \n",
+ "9 Variant Drug Annotation 26135562 \n",
+ "10 Variant Drug Annotation 25171465 \n",
+ "11 Variant Drug Annotation 25755212 \n",
+ "12 Variant Drug Annotation 26568242 \n",
+ "13 Variant Drug Annotation 25473543 \n",
+ "14 Variant Drug Annotation 25145599 \n",
+ "15 Variant Drug Annotation 23628510 \n",
+ "16 Variant Drug Annotation 25049054 \n",
+ "17 Variant Drug Annotation 22942289 \n",
+ "18 Variant Drug Annotation 19846789 \n",
+ "19 Variant Drug Annotation 22293084 \n",
+ "20 Variant Drug Annotation 23757359 \n",
+ "21 Variant Drug Annotation 23313410 \n",
+ "22 Variant Drug Annotation 24461666 \n",
+ "23 Variant Drug Annotation 27158673 \n",
+ "\n",
+ " Summary \\\n",
+ "0 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "1 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "2 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n",
+ "3 Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis. \n",
+ "4 Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n",
+ "5 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "6 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "7 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "8 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "9 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "10 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n",
+ "11 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "12 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "13 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "14 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "15 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n",
+ "16 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "17 Allele A is associated with increased response to ivacaftor. \n",
+ "18 Allele A is associated with increased response to ivacaftor. \n",
+ "19 Allele A is associated with increased response to ivacaftor. \n",
+ "20 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "21 Allele A is associated with response to ivacaftor in men with Cystic Fibrosis. \n",
+ "22 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "23 Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n",
+ "\n",
+ " Variant Annotation ID Variant/Haplotypes_var_drug Gene_var_drug \\\n",
+ "0 981755665 rs75527207 CFTR \n",
+ "1 981755678 rs75527207 CFTR \n",
+ "2 982009991 rs75527207 CFTR \n",
+ "3 1183629335 rs75527207 CFTR \n",
+ "4 1448423752 rs75527207 CFTR \n",
+ "5 1449191908 rs75527207 CFTR \n",
+ "6 1449192055 rs75527207 CFTR \n",
+ "7 1449192093 rs75527207 CFTR \n",
+ "8 1449192439 rs75527207 CFTR \n",
+ "9 1449192481 rs75527207 CFTR \n",
+ "10 1449192494 rs75527207 CFTR \n",
+ "11 1449192576 rs75527207 CFTR \n",
+ "12 1449192615 rs75527207 CFTR \n",
+ "13 1449192709 rs75527207 CFTR \n",
+ "14 1449192721 rs75527207 CFTR \n",
+ "15 1450043422 rs75527207 CFTR \n",
+ "16 1184512440 rs75527207 CFTR \n",
+ "17 981755746 rs75527207 CFTR \n",
+ "18 981755699 rs75527207 CFTR \n",
+ "19 981755787 rs75527207 CFTR \n",
+ "20 1043737597 rs75527207 CFTR \n",
+ "21 982006840 rs75527207 CFTR \n",
+ "22 1446903789 rs75527207 CFTR \n",
+ "23 1448099051 rs75527207 CFTR \n",
+ "\n",
+ " Drug(s)_var_drug PMID_var_drug Phenotype Category_var_drug Significance \\\n",
+ "0 ivacaftor 21083385 Efficacy not stated \n",
+ "1 ivacaftor 22047557 Efficacy not stated \n",
+ "2 ivacaftor 23590265 Efficacy yes \n",
+ "3 ivacaftor 24066763 Efficacy not stated \n",
+ "4 ivacaftor 27773592 Efficacy yes \n",
+ "5 ivacaftor 25682022 Efficacy not stated \n",
+ "6 ivacaftor 28711222 Efficacy yes \n",
+ "7 ivacaftor 25311995 Efficacy not stated \n",
+ "8 ivacaftor 28611235 Efficacy yes \n",
+ "9 ivacaftor 26135562 Efficacy yes \n",
+ "10 ivacaftor 25171465 Efficacy not stated \n",
+ "11 ivacaftor 25755212 Efficacy yes \n",
+ "12 ivacaftor 26568242 Efficacy yes \n",
+ "13 ivacaftor 25473543 Efficacy not stated \n",
+ "14 ivacaftor 25145599 Efficacy yes \n",
+ "15 ivacaftor 23628510 Efficacy yes \n",
+ "16 ivacaftor 25049054 Efficacy yes \n",
+ "17 ivacaftor 22942289 Efficacy not stated \n",
+ "18 ivacaftor 19846789 Efficacy yes \n",
+ "19 ivacaftor 22293084 Efficacy yes \n",
+ "20 ivacaftor 23757359 Efficacy yes \n",
+ "21 ivacaftor 23313410 Efficacy not stated \n",
+ "22 ivacaftor 24461666 Efficacy yes \n",
+ "23 ivacaftor 27158673 Efficacy yes \n",
+ "\n",
+ " Notes \\\n",
+ "0 Clinical trials were carried out to test efficacy of ivacaftor selecting only patients with the CFTR G551D mutation on at least one allele (genotype AA or AG). \n",
+ "1 A clinical trial that selected patients with the G551D CFTR mutation (rs75527207 genotype AA or AG). Patients without this mutation were excluded. One patient included in the placebo group was homozygous for F508del (rs113993960 genotype del/del). \n",
+ "2 Patients aged 6-11 at time of screening who had at least one allele with the G551D mutation (allele A at position rs75527207) were recruited for this trial. Ivacaftor is only indicated in CF patients with this mutation. Significant improvements in lung function were seen in the ivacaftor treatment group compared to placebo. \n",
+ "3 Case report of a female homozygous for the G551D CFTR mutation (genotype AA) in which ivacaftor was efficacious: increased absolute change in percent of predicted FEV1, increased weight and walk distance and decreased sweat chloride levels over a 12 month course with no sign of plateau to date. \n",
+ "4 The outcome of change in sweat chloride was correlated with change in FEV1 in patients with cystic fibrosis and found to have improved results for both. \n",
+ "5 Study was an expanded access program targeted at patients with severe lung disease and was not powered to determine efficacy. Majority of patients reported an improvement in FEV following 24 weeks of treatment. \n",
+ "6 G551D allele. Statistically significant increases in FEV1, weight and BMI and statistically significant decreases in sweat chloride level, the number of days of antibiotic treatment and in the use of some maintenance treatments.; No differences in bone density, pancreatic insufficiency and cystic fibrosis related diabetes were observed. \n",
+ "7 G551 D allele. Increases in FEV1, body weight, CFQ-R scores and time to first pulmonary exacerbation were observed. \n",
+ "8 G551D allele. FEV1, Alfred wellness score, exercise time, CFQ-R score and sweat chloride levels showed a significant improvement following ivacaftor treatment as compared to placebo while other outcomes (VO2, ventilation, cardiac response nd recovery following exercise) did not. \n",
+ "9 G551D allele. Analysis of CFQ-R scores from participants in the STRIVE trial. Scores for eating problems, health perceptions, physical functioning, respiratory symptoms, social functioning, treatment burden and vitality showed significant improvements following ivacaftor treatment. \n",
+ "10 Case study of a pediatric cystic fibrosis patient. Improvements in sweat chloride, BMI, bronchiectasis and lung function reported following ivacaftor treatment. \n",
+ "11 Post hoc analysis of clinical outcomes of the STRIVE and ENVISION trials. Participants were split into tertiles based on FEV1 score and outcomes in change in baseline FEV1, body weight, CFQ-R score and sweat chloride levels as well as number of days of pulmonary exacerbation were assessed. All outcomes were significantly improved in the upper tertile, all outcomes apart from number of days of pulmonary exacerbation were significantly improved in the middle tertile and absolute change in FEV1, body weight and sweat chloride levels were significantly improved in the lower tertile. \n",
+ "12 Response measured by changes in sweat chloride levels, FEV1 and BMI. \n",
+ "13 G551D allele. Case report of three patients with the F508del/G551D genotype. Reported improvements in FEV1, body weight, sweat chloride levels and scores in the respiratory domain of the CFQ-R. \n",
+ "14 G551D allele. Significant increases in %FVC and %FEV1 compared to baseline were seen at 6 months of ivacaftor treatment, but both measures declined to baseline by 12 months of ivacaftor treatment. Significant improvements in BMI, body weight, sinus disease status and sweat chloride levels were seen at 12 months of ivacaftor treatment. \n",
+ "15 NaN \n",
+ "16 Patients with at least one G551D-CFTR allele were recruited and treated with ivacaftor for one year. Mean weight and BMI improved at 6 months from baseline, but only mean weight was increased again at 12 months. Mean percentage FVC, FEV1 and FEF25-75% returned to baseline levels by 12 months of treatment. \n",
+ "17 In vitro studies using proteoliposomes containing CFTR, or CFTR with the G551D mutation (rs75527207 allele A), or CFTR with the F508del mutation (rs113993960 allele del). Ivacaftor in the presence of ATP potentiated channel activity of CFTR-G551D. \n",
+ "18 In vitro assays that show ivacaftor potentiates CFTR with the G551D mutation (rs75527207 allele A) - see details described in study parameters. \n",
+ "19 as compared to baseline. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Before treatment, cells were activated by exposure to PKA and ATP before ivacaftor treatment. In vitro assays using transfected Fisher Rat Thyroid cells expressing CFTR. Cells expressing G551D-CFTR (rs75527207 allele A) responded to ivacaftor treatment with a significantly enhanced channel open probability and increased chloride transport. Single channel current amplitude at 80mV was not significantly enhanced. \n",
+ "20 A retrospective study of patients in Germany with severe Cystic Fibrosis (FEV1 <40%predicted) with the G551D mutation who were treated with ivacaftor. On average, FEV1and body weight increased significantly, though response was variable in this patient group and several patients discontinued ivacaftor for different complications. \n",
+ "21 A case report of lung function improvements 6 months after treatment with ivacaftor in a male patient with severe lung disease - he had the CFTR G511D (rsrs75527207 allele A)/deltaF508 genotype (rs113993960 del CTT) and so could be given ivacaftor. \n",
+ "22 The authors wanted to assess the efficacy of ivacaftor in patients with cystic fibrosis who have normal spirometry. The authors assessed lung function improvement in patients using lung clearance index (LCI) as well as forced expiratory volume in 1 second (FEV1), and only included patients with < 90% FEV1 values. The primary outcome was change in LCI from baseline. This was a phase 2, multi-centre, placebo-controlled, 2x2 crossover study. One group, sequence 1, took placebo first, followed by 28 day washout, then took ivacaftor 150 mg 2x daily for 4 weeks. The second group had the sequence of treatment reversed. \n",
+ "23 Measured in adult patients, with changes in lung volume, sweat chloride, distensibility, wall thickness, expiratory lumen area, and inspiratory lumen area measured before starting ivacaftor and 48 hour after starting ivacaftor. \n",
+ "\n",
+ " Sentence \\\n",
+ "0 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "1 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "2 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n",
+ "3 Genotype AA is associated with response to ivacaftor in women with Cystic Fibrosis. \n",
+ "4 Genotypes AA + AG is associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n",
+ "5 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "6 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "7 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "8 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "9 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "10 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n",
+ "11 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "12 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "13 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "14 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "15 Allele A is associated with response to ivacaftor in children with Cystic Fibrosis. \n",
+ "16 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "17 Allele A is associated with increased response to ivacaftor. \n",
+ "18 Allele A is associated with increased response to ivacaftor. \n",
+ "19 Allele A is associated with increased response to ivacaftor. \n",
+ "20 Allele A is associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "21 Allele A is associated with response to ivacaftor in men with Cystic Fibrosis. \n",
+ "22 Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis. \n",
+ "23 Genotypes AA + AG are associated with increased response to ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n",
+ "\n",
+ " Alleles Specialty Population Metabolizer types isPlural \\\n",
+ "0 AA + AG NaN NaN Are \n",
+ "1 AA + AG NaN NaN Are \n",
+ "2 A Pediatric NaN Is \n",
+ "3 AA NaN NaN Is \n",
+ "4 AA + AG Pediatric NaN Is \n",
+ "5 A Pediatric NaN Is \n",
+ "6 A Pediatric NaN Is \n",
+ "7 A Pediatric NaN Is \n",
+ "8 A NaN NaN Is \n",
+ "9 A Pediatric NaN Is \n",
+ "10 A Pediatric NaN Is \n",
+ "11 A Pediatric NaN Is \n",
+ "12 A Pediatric NaN Is \n",
+ "13 A NaN NaN Is \n",
+ "14 A Pediatric NaN Is \n",
+ "15 A Pediatric NaN Is \n",
+ "16 A NaN NaN Is \n",
+ "17 A NaN NaN Is \n",
+ "18 A NaN NaN Is \n",
+ "19 A NaN NaN Is \n",
+ "20 A NaN NaN Is \n",
+ "21 A NaN NaN Is \n",
+ "22 AA + AG Pediatric NaN Are \n",
+ "23 AA + AG NaN NaN Are \n",
+ "\n",
+ " Is/Is Not associated Direction of effect PD/PK terms \\\n",
+ "0 Associated with NaN response to \n",
+ "1 Associated with NaN response to \n",
+ "2 Associated with NaN response to \n",
+ "3 Associated with NaN response to \n",
+ "4 Associated with increased response to \n",
+ "5 Associated with NaN response to \n",
+ "6 Associated with NaN response to \n",
+ "7 Associated with NaN response to \n",
+ "8 Associated with NaN response to \n",
+ "9 Associated with NaN response to \n",
+ "10 Associated with NaN response to \n",
+ "11 Associated with NaN response to \n",
+ "12 Associated with NaN response to \n",
+ "13 Associated with NaN response to \n",
+ "14 Associated with NaN response to \n",
+ "15 Associated with NaN response to \n",
+ "16 Associated with NaN response to \n",
+ "17 Associated with increased response to \n",
+ "18 Associated with increased response to \n",
+ "19 Associated with increased response to \n",
+ "20 Associated with NaN response to \n",
+ "21 Associated with NaN response to \n",
+ "22 Associated with NaN response to \n",
+ "23 Associated with increased response to \n",
+ "\n",
+ " Multiple drugs And/or Population types Population Phenotypes or diseases \\\n",
+ "0 NaN in people with Disease:Cystic Fibrosis \n",
+ "1 NaN in people with Disease:Cystic Fibrosis \n",
+ "2 NaN in children with Disease:Cystic Fibrosis \n",
+ "3 NaN in women with Disease:Cystic Fibrosis \n",
+ "4 NaN in people with Disease:Cystic Fibrosis \n",
+ "5 NaN in people with Disease:Cystic Fibrosis \n",
+ "6 NaN in people with Disease:Cystic Fibrosis \n",
+ "7 NaN in people with Disease:Cystic Fibrosis \n",
+ "8 NaN in people with Disease:Cystic Fibrosis \n",
+ "9 NaN in people with Disease:Cystic Fibrosis \n",
+ "10 NaN in children with Disease:Cystic Fibrosis \n",
+ "11 NaN in people with Disease:Cystic Fibrosis \n",
+ "12 NaN in people with Disease:Cystic Fibrosis \n",
+ "13 NaN in people with Disease:Cystic Fibrosis \n",
+ "14 NaN in people with Disease:Cystic Fibrosis \n",
+ "15 NaN in children with Disease:Cystic Fibrosis \n",
+ "16 NaN in people with Disease:Cystic Fibrosis \n",
+ "17 NaN NaN NaN \n",
+ "18 NaN NaN NaN \n",
+ "19 NaN NaN NaN \n",
+ "20 NaN in people with Disease:Cystic Fibrosis \n",
+ "21 NaN in men with Disease:Cystic Fibrosis \n",
+ "22 NaN in people with Disease:Cystic Fibrosis \n",
+ "23 NaN in people with Disease:Cystic Fibrosis \n",
+ "\n",
+ " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN GG \n",
+ "5 NaN NaN \n",
+ "6 NaN NaN \n",
+ "7 NaN NaN \n",
+ "8 NaN NaN \n",
+ "9 NaN NaN \n",
+ "10 NaN NaN \n",
+ "11 NaN NaN \n",
+ "12 NaN NaN \n",
+ "13 NaN NaN \n",
+ "14 NaN NaN \n",
+ "15 NaN NaN \n",
+ "16 NaN NaN \n",
+ "17 NaN NaN \n",
+ "18 NaN NaN \n",
+ "19 NaN NaN \n",
+ "20 NaN NaN \n",
+ "21 NaN NaN \n",
+ "22 NaN NaN \n",
+ "23 NaN GG \n",
+ "\n",
+ " Comparison Metabolizer types \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "5 NaN \n",
+ "6 NaN \n",
+ "7 NaN \n",
+ "8 NaN \n",
+ "9 NaN \n",
+ "10 NaN \n",
+ "11 NaN \n",
+ "12 NaN \n",
+ "13 NaN \n",
+ "14 NaN \n",
+ "15 NaN \n",
+ "16 NaN \n",
+ "17 NaN \n",
+ "18 NaN \n",
+ "19 NaN \n",
+ "20 NaN \n",
+ "21 NaN \n",
+ "22 NaN \n",
+ "23 NaN "
+ ]
+ },
+ "execution_count": 248,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_981755803_drug"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "id": "86225d03-93ea-4d22-8569-0bb1360f4d66",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Clinical Annotation ID | \n",
+ " Variant/Haplotypes | \n",
+ " Gene | \n",
+ " Level of Evidence | \n",
+ " Phenotype Category | \n",
+ " Drug(s) | \n",
+ " Phenotype(s) | \n",
+ " Evidence ID | \n",
+ " Evidence Type | \n",
+ " PMID | \n",
+ " Summary | \n",
+ " Variant Annotation ID | \n",
+ " Variant/Haplotypes_var_fa | \n",
+ " Gene_var_fa | \n",
+ " Drug(s)_var_fa | \n",
+ " PMID_var_fa | \n",
+ " Phenotype Category_var_fa | \n",
+ " Significance | \n",
+ " Notes | \n",
+ " Sentence | \n",
+ " Alleles | \n",
+ " Specialty Population | \n",
+ " Assay type | \n",
+ " Metabolizer types | \n",
+ " isPlural | \n",
+ " Is/Is Not associated | \n",
+ " Direction of effect | \n",
+ " Functional terms | \n",
+ " Gene/gene product | \n",
+ " When treated with/exposed to/when assayed with | \n",
+ " Multiple drugs And/or | \n",
+ " Cell type | \n",
+ " Comparison Allele(s) or Genotype(s) | \n",
+ " Comparison Metabolizer types | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1043737620 | \n",
+ " Variant Functional Assay Annotation | \n",
+ " 23757361 | \n",
+ " Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells. | \n",
+ " 1043737620 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 23757361 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " compared to no treatment. Ivacaftor stimulated CFTR activity in CFTR-G551D expressing CHO cells (as measured by iodine efflux). | \n",
+ " Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " activity of | \n",
+ " CFTR | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " in transfected CHO cells | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1043737636 | \n",
+ " Variant Functional Assay Annotation | \n",
+ " 23891399 | \n",
+ " Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines. | \n",
+ " 1043737636 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 23891399 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " G551D allele. 55.3 fold increase in chloride transport upon ivacaftor treatment as compared to baseline (no ivacaftor treatment). | \n",
+ " Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines. | \n",
+ " A | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " NaN | \n",
+ " activity of | \n",
+ " CFTR | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " in FRT cell lines | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Clinical Annotation ID Variant/Haplotypes Gene Level of Evidence \\\n",
+ "0 981755803 rs75527207 CFTR 1A \n",
+ "1 981755803 rs75527207 CFTR 1A \n",
+ "\n",
+ " Phenotype Category Drug(s) Phenotype(s) Evidence ID \\\n",
+ "0 Efficacy ivacaftor Cystic Fibrosis 1043737620 \n",
+ "1 Efficacy ivacaftor Cystic Fibrosis 1043737636 \n",
+ "\n",
+ " Evidence Type PMID \\\n",
+ "0 Variant Functional Assay Annotation 23757361 \n",
+ "1 Variant Functional Assay Annotation 23891399 \n",
+ "\n",
+ " Summary \\\n",
+ "0 Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells. \n",
+ "1 Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines. \n",
+ "\n",
+ " Variant Annotation ID Variant/Haplotypes_var_fa Gene_var_fa Drug(s)_var_fa \\\n",
+ "0 1043737620 rs75527207 CFTR ivacaftor \n",
+ "1 1043737636 rs75527207 CFTR ivacaftor \n",
+ "\n",
+ " PMID_var_fa Phenotype Category_var_fa Significance \\\n",
+ "0 23757361 Efficacy yes \n",
+ "1 23891399 Efficacy yes \n",
+ "\n",
+ " Notes \\\n",
+ "0 compared to no treatment. Ivacaftor stimulated CFTR activity in CFTR-G551D expressing CHO cells (as measured by iodine efflux). \n",
+ "1 G551D allele. 55.3 fold increase in chloride transport upon ivacaftor treatment as compared to baseline (no ivacaftor treatment). \n",
+ "\n",
+ " Sentence \\\n",
+ "0 Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells. \n",
+ "1 Allele A is associated with activity of CFTR when treated with ivacaftor in FRT cell lines. \n",
+ "\n",
+ " Alleles Specialty Population Assay type Metabolizer types isPlural \\\n",
+ "0 A NaN NaN NaN Is \n",
+ "1 A NaN NaN NaN Is \n",
+ "\n",
+ " Is/Is Not associated Direction of effect Functional terms Gene/gene product \\\n",
+ "0 Associated with increased activity of CFTR \n",
+ "1 Associated with NaN activity of CFTR \n",
+ "\n",
+ " When treated with/exposed to/when assayed with Multiple drugs And/or \\\n",
+ "0 when treated with NaN \n",
+ "1 when treated with NaN \n",
+ "\n",
+ " Cell type Comparison Allele(s) or Genotype(s) \\\n",
+ "0 in transfected CHO cells NaN \n",
+ "1 in FRT cell lines NaN \n",
+ "\n",
+ " Comparison Metabolizer types \n",
+ "0 NaN \n",
+ "1 NaN "
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_981755803_fa"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "id": "49c52d4d-8c99-4788-a922-310ac566b4f6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Clinical Annotation ID | \n",
+ " Variant/Haplotypes | \n",
+ " Gene | \n",
+ " Level of Evidence | \n",
+ " Phenotype Category | \n",
+ " Drug(s) | \n",
+ " Phenotype(s) | \n",
+ " Evidence ID | \n",
+ " Evidence Type | \n",
+ " PMID | \n",
+ " Summary | \n",
+ " Variant Annotation ID | \n",
+ " Variant/Haplotypes_var_pheno | \n",
+ " Gene_var_pheno | \n",
+ " Drug(s)_var_pheno | \n",
+ " PMID_var_pheno | \n",
+ " Phenotype Category_var_pheno | \n",
+ " Significance | \n",
+ " Notes | \n",
+ " Sentence | \n",
+ " Alleles | \n",
+ " Specialty Population | \n",
+ " Metabolizer types | \n",
+ " isPlural | \n",
+ " Is/Is Not associated | \n",
+ " Direction of effect | \n",
+ " Side effect/efficacy/other | \n",
+ " Phenotype | \n",
+ " Multiple phenotypes And/or | \n",
+ " When treated with/exposed to/when assayed with | \n",
+ " Multiple drugs And/or | \n",
+ " Population types | \n",
+ " Population Phenotypes or diseases | \n",
+ " Multiple phenotypes or diseases And/or | \n",
+ " Comparison Allele(s) or Genotype(s) | \n",
+ " Comparison Metabolizer types | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1448267532 | \n",
+ " Variant Phenotype Annotation | \n",
+ " 27745802 | \n",
+ " Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | \n",
+ " 1448267532 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 27745802 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " Bone mineral density compared before and after 1 year of treatment with ivacaftor using dual energy X-ray absorptiometry at the L2-L4 lumbar spine. All patients were pancreatic insufficient. | \n",
+ " Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG. | \n",
+ " AA + AG | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " decreased | \n",
+ " severity of | \n",
+ " Side Effect:bone density | \n",
+ " and | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " GG | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 981755803 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " 1A | \n",
+ " Efficacy | \n",
+ " ivacaftor | \n",
+ " Cystic Fibrosis | \n",
+ " 1449192031 | \n",
+ " Variant Phenotype Annotation | \n",
+ " 28651844 | \n",
+ " Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis. | \n",
+ " 1449192031 | \n",
+ " rs75527207 | \n",
+ " CFTR | \n",
+ " ivacaftor | \n",
+ " 28651844 | \n",
+ " Efficacy | \n",
+ " yes | \n",
+ " G551D allele. Patients receiving ivacaftor treatment had a reduced rate of pulmonary exacerbation events compared to patients receiving a placebo. | \n",
+ " Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis. | \n",
+ " A | \n",
+ " Pediatric | \n",
+ " NaN | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " decreased | \n",
+ " likelihood of | \n",
+ " Disease:cystic fibrosis pulmonary exacerbation | \n",
+ " and | \n",
+ " when treated with | \n",
+ " NaN | \n",
+ " in people with | \n",
+ " Disease:Cystic Fibrosis | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Clinical Annotation ID Variant/Haplotypes Gene Level of Evidence \\\n",
+ "0 981755803 rs75527207 CFTR 1A \n",
+ "1 981755803 rs75527207 CFTR 1A \n",
+ "\n",
+ " Phenotype Category Drug(s) Phenotype(s) Evidence ID \\\n",
+ "0 Efficacy ivacaftor Cystic Fibrosis 1448267532 \n",
+ "1 Efficacy ivacaftor Cystic Fibrosis 1449192031 \n",
+ "\n",
+ " Evidence Type PMID \\\n",
+ "0 Variant Phenotype Annotation 27745802 \n",
+ "1 Variant Phenotype Annotation 28651844 \n",
+ "\n",
+ " Summary \\\n",
+ "0 Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n",
+ "1 Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis. \n",
+ "\n",
+ " Variant Annotation ID Variant/Haplotypes_var_pheno Gene_var_pheno \\\n",
+ "0 1448267532 rs75527207 CFTR \n",
+ "1 1449192031 rs75527207 CFTR \n",
+ "\n",
+ " Drug(s)_var_pheno PMID_var_pheno Phenotype Category_var_pheno Significance \\\n",
+ "0 ivacaftor 27745802 Efficacy yes \n",
+ "1 ivacaftor 28651844 Efficacy yes \n",
+ "\n",
+ " Notes \\\n",
+ "0 Bone mineral density compared before and after 1 year of treatment with ivacaftor using dual energy X-ray absorptiometry at the L2-L4 lumbar spine. All patients were pancreatic insufficient. \n",
+ "1 G551D allele. Patients receiving ivacaftor treatment had a reduced rate of pulmonary exacerbation events compared to patients receiving a placebo. \n",
+ "\n",
+ " Sentence \\\n",
+ "0 Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG. \n",
+ "1 Allele A is associated with decreased likelihood of cystic fibrosis pulmonary exacerbation when treated with ivacaftor in people with Cystic Fibrosis. \n",
+ "\n",
+ " Alleles Specialty Population Metabolizer types isPlural \\\n",
+ "0 AA + AG NaN NaN Is \n",
+ "1 A Pediatric NaN Is \n",
+ "\n",
+ " Is/Is Not associated Direction of effect Side effect/efficacy/other \\\n",
+ "0 Associated with decreased severity of \n",
+ "1 Associated with decreased likelihood of \n",
+ "\n",
+ " Phenotype Multiple phenotypes And/or \\\n",
+ "0 Side Effect:bone density and \n",
+ "1 Disease:cystic fibrosis pulmonary exacerbation and \n",
+ "\n",
+ " When treated with/exposed to/when assayed with Multiple drugs And/or \\\n",
+ "0 when treated with NaN \n",
+ "1 when treated with NaN \n",
+ "\n",
+ " Population types Population Phenotypes or diseases \\\n",
+ "0 in people with Disease:Cystic Fibrosis \n",
+ "1 in people with Disease:Cystic Fibrosis \n",
+ "\n",
+ " Multiple phenotypes or diseases And/or Comparison Allele(s) or Genotype(s) \\\n",
+ "0 NaN GG \n",
+ "1 NaN NaN \n",
+ "\n",
+ " Comparison Metabolizer types \n",
+ "0 NaN \n",
+ "1 NaN "
+ ]
+ },
+ "execution_count": 94,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_981755803_pheno"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "id": "08abfc16-b970-4f45-96c1-5e1584ba9a0e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "28"
+ ]
+ },
+ "execution_count": 93,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Comparing number of PMIDs vs. number of evidence\n",
+ "len(set(df_981755803_drug['PMID']) | set(df_981755803_pheno['PMID']) | set(df_981755803_fa['PMID']))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e5d40be8-65a0-487b-a175-d98aa502aaeb",
+ "metadata": {},
+ "source": [
+ "#### Observations so far\n",
+ "Clinical annotation [981755803](https://www.pharmgkb.org/clinicalAnnotation/981755803) has 30 supporting evidence:\n",
+ "* 24 variant/drug annotations\n",
+ "* 2 variant/functional assay annotations\n",
+ "* 2 variant/phenotype annotations\n",
+ "* 2 others (drug labels & guidelines, present in another data download so not included here)\n",
+ "\n",
+ "Each variant annotation is associated with a PMID, these are 1:1 (at least in this example).\n",
+ "* We should think about whether we want to preserve the PMID & evidence associations.\n",
+ "\n",
+ "These annotations seem much more specific than the clinical annotations, e.g.\n",
+ "* they distinguish between \"disease\" and \"side effect\" (check how often)\n",
+ "* if there are multiple phenotypes or drugs, they specify whether these should be \"and\"s or \"or\"s\n",
+ "\n",
+ "These annotations are specific to one or more alleles or genotypes, so we will need to associate them accordingly.\n",
+ "\n",
+ "It would good if we could select the relevant columns from the 3 variant annotation tables and merge them into a unified representation, so we don't have to manage them separately in the pipelines or in the UI\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "85e68502-9717-40bd-989a-e9aa7376879d",
+ "metadata": {},
+ "source": [
+ "## Coverage\n",
+ "\n",
+ "[Top of page](#Table-of-contents)\n",
+ "\n",
+ "How many annotations have evidence, how many have direction of effect specifically"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "id": "57d1856b-6b82-4084-9def-5c95db9cdfef",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5111"
+ ]
+ },
+ "execution_count": 97,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(clinical_annotations)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 129,
+ "id": "23b76684-1ceb-4dd5-b95a-931a57fbcafc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "15129"
+ ]
+ },
+ "execution_count": 129,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Exploded on evidence - average 3 per annotation\n",
+ "len(main_df)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 130,
+ "id": "8d92bd51-114f-4b9f-961e-3afcc9084560",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add all the var annotation tables - this will be a huge mess\n",
+ "main_with_var = pd.merge(main_df, var_drug_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='left', suffixes=(None, '_var_drug'))\n",
+ "main_with_var = pd.merge(main_with_var, var_pheno_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='left', suffixes=(None, '_var_pheno'))\n",
+ "main_with_var = pd.merge(main_with_var, var_fa_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='left', suffixes=(None, '_var_fa'))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "id": "67228cc8-68e9-4265-997d-047a31458c7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ca_with_var_evidence = set(main_with_var[main_with_var['Sentence'].notna() | main_with_var['Sentence_var_pheno'].notna() | main_with_var['Sentence_var_fa'].notna()][ID_COL_NAME])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 147,
+ "id": "84438d2c-21c2-4411-bbda-742a2f3e94da",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5111"
+ ]
+ },
+ "execution_count": 147,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Every clinical annotation has at least one variant annotation as supporting evidence\n",
+ "len(ca_with_var_evidence)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 205,
+ "id": "3dee0e63-6bfa-4c59-9838-cc129e4aacad",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Evidence from var/drug 2435\n",
+ "Evidence from var/pheno 2958\n",
+ "Evidence from var/fa 418\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Evidence from var/drug', len(set(main_with_var[main_with_var['Sentence'].notna()][ID_COL_NAME])))\n",
+ "print('Evidence from var/pheno', len(set(main_with_var[main_with_var['Sentence_var_pheno'].notna()][ID_COL_NAME])))\n",
+ "print('Evidence from var/fa', len(set(main_with_var[main_with_var['Sentence_var_fa'].notna()][ID_COL_NAME])))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 196,
+ "id": "38006cda-cedf-44d2-9d72-50ec1baf922e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main_with_var_where_notna(common_col_name):\n",
+ " # Filter main_with_var on non-na columns that are common to all three variant annotation tables\n",
+ " return main_with_var[\n",
+ " main_with_var[common_col_name].notna()\n",
+ " | main_with_var[f'{common_col_name}_var_pheno'].notna()\n",
+ " | main_with_var[f'{common_col_name}_var_fa'].notna()\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 197,
+ "id": "3bea97eb-cbaa-4cc6-873a-33f240140d60",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main_with_var_values_in(common_col_name):\n",
+ " # Return set of values in given column, common to all three variant annotation tables\n",
+ " return (\n",
+ " set(main_with_var[common_col_name]) \n",
+ " | set(main_with_var[f'{common_col_name}_var_pheno']) \n",
+ " | set(main_with_var[f'{common_col_name}_var_fa'])\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 231,
+ "id": "a9814220-d767-4e1e-b3ca-ea27b61a5777",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ca_with_doe_evidence = set(main_with_var_where_notna('Direction of effect')[ID_COL_NAME])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 232,
+ "id": "3be7753e-902e-47ec-985e-e2b08f70158d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "4917"
+ ]
+ },
+ "execution_count": 232,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Most contain some kind of direction of effect info\n",
+ "len(ca_with_doe_evidence)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 230,
+ "id": "05a195e7-42ec-4b26-a8b0-d7aa0f463053",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.9620426531011543"
+ ]
+ },
+ "execution_count": 230,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "4917 / 5111"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 234,
+ "id": "05435327-2590-4018-abdd-e3afe14632a4",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Total 15129\n",
+ "With variant annotation 14658\n",
+ "With PMID 14658\n",
+ "With allele 14248\n",
+ "With comparison allele 12464\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Total', len(main_with_var)) # i.e. clinical annotations exploded by evidence id\n",
+ "print('With variant annotation', len(main_with_var_where_notna('Variant Annotation ID')))\n",
+ "print('With PMID', len(main_with_var_where_notna('PMID')))\n",
+ "print('With allele', len(main_with_var_where_notna('Alleles')))\n",
+ "print('With comparison allele', len(main_with_var_where_notna('Comparison Allele(s) or Genotype(s)')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 199,
+ "id": "712f2365-fb6a-4c12-b73f-167756358e48",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "27427"
+ ]
+ },
+ "execution_count": 199,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Suddenly worried about counts\n",
+ "# All variant annotation IDs in all three tables\n",
+ "len(set(var_drug_ann['Variant Annotation ID']) | set(var_fa_ann['Variant Annotation ID']) | set(var_pheno_ann['Variant Annotation ID']))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 200,
+ "id": "fbae3775-37f2-42e2-a639-5596bfab2dad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13783"
+ ]
+ },
+ "execution_count": 200,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# All evidence IDs - includes variant annotations and drug labels\n",
+ "len(set(main_df['Evidence ID']))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 180,
+ "id": "b55fcd03-ab2d-4cc4-8c84-4c8100abc2ee",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "13778"
+ ]
+ },
+ "execution_count": 180,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "all_var_ann_ids = set(var_drug_ann['Variant Annotation ID']) | set(var_fa_ann['Variant Annotation ID']) | set(var_pheno_ann['Variant Annotation ID'])\n",
+ "all_ev_ids = set(main_df['Evidence ID'])\n",
+ "\n",
+ "# Not all variant annotation evidence is used\n",
+ "len(all_var_ann_ids - all_ev_ids)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "98c401bc-d920-405f-90a1-93016c7ed9ae",
+ "metadata": {},
+ "source": [
+ "#### Observations so far:\n",
+ "* Every clinical annotation has at least one variant annotation as supporting evidence\n",
+ "* Most contain some kind of direction of effect info (i.e. in one of the three tables)\n",
+ " * => coverage is good, assuming we care about all three types of effects\n",
+ "* Selecting one table covers at most about half of the clinical annotations\n",
+ "* Not all variant annotation evidence is included in a clinical annotation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "dc4b1bfd-a8ab-4215-832d-5a6aa177470c",
+ "metadata": {},
+ "source": [
+ "## Direction of effect\n",
+ "\n",
+ "[Top of page](#Table-of-contents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "01ff8ed9-bdc2-454e-9a68-c9530b9f5de8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Trying to make sense of the columns - output suppressed for brevity\n",
+ "main_with_var.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 133,
+ "id": "126d1762-ae36-4003-989d-c28d96b78e12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "all_var_ann_cols = set(var_drug_ann.columns) | set(var_fa_ann.columns) | set(var_pheno_ann.columns)\n",
+ "common_var_ann_cols = set(var_drug_ann.columns) & set(var_fa_ann.columns) & set(var_pheno_ann.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
+ "id": "86c927dd-2dfc-4957-9afb-50eb7334d213",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'Alleles',\n",
+ " 'Comparison Allele(s) or Genotype(s)',\n",
+ " 'Comparison Metabolizer types',\n",
+ " 'Direction of effect',\n",
+ " 'Drug(s)',\n",
+ " 'Gene',\n",
+ " 'Is/Is Not associated',\n",
+ " 'Metabolizer types',\n",
+ " 'Multiple drugs And/or',\n",
+ " 'Notes',\n",
+ " 'PMID',\n",
+ " 'Phenotype Category',\n",
+ " 'Sentence',\n",
+ " 'Significance',\n",
+ " 'Specialty Population',\n",
+ " 'Variant Annotation ID',\n",
+ " 'Variant/Haplotypes',\n",
+ " 'isPlural'}"
+ ]
+ },
+ "execution_count": 134,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "common_var_ann_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "id": "44b41088-2162-4138-a719-1de7e4c3c156",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "unique_var_ann_cols = all_var_ann_cols - common_var_ann_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 136,
+ "id": "4f19f44b-d472-4b1b-ae13-1faa8a3ea5b9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# annotate with origin table\n",
+ "annotated_unique_var_ann_cols = {'drug':[], 'fa':[], 'pheno':[]}\n",
+ "for c in unique_var_ann_cols:\n",
+ " if c in var_drug_ann.columns:\n",
+ " annotated_unique_var_ann_cols['drug'].append(c)\n",
+ " if c in var_fa_ann.columns:\n",
+ " annotated_unique_var_ann_cols['fa'].append(c)\n",
+ " if c in var_pheno_ann.columns:\n",
+ " annotated_unique_var_ann_cols['pheno'].append(c)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 137,
+ "id": "888398bc-00ce-45bd-af1b-0053c47ba3c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'drug': ['Multiple phenotypes or diseases And/or',\n",
+ " 'Population types',\n",
+ " 'Population Phenotypes or diseases',\n",
+ " 'PD/PK terms'],\n",
+ " 'fa': ['Cell type',\n",
+ " 'Functional terms',\n",
+ " 'When treated with/exposed to/when assayed with',\n",
+ " 'Gene/gene product',\n",
+ " 'Assay type'],\n",
+ " 'pheno': ['Multiple phenotypes or diseases And/or',\n",
+ " 'Side effect/efficacy/other',\n",
+ " 'Multiple phenotypes And/or',\n",
+ " 'When treated with/exposed to/when assayed with',\n",
+ " 'Population types',\n",
+ " 'Population Phenotypes or diseases',\n",
+ " 'Phenotype']}"
+ ]
+ },
+ "execution_count": 137,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "annotated_unique_var_ann_cols"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a3b9cbd-c13e-4e98-831a-aa508fc69d10",
+ "metadata": {},
+ "source": [
+ "#### Sentence breakdown\n",
+ "\n",
+ "[Top of page](#Table-of-contents)\n",
+ "\n",
+ "* Population phenotype always goes with \"multiple phenotypes and/or\" (note functional assay doesn't mention phenotype - I guess there's no multiples in the gene product?)\n",
+ " * where does the main table phenotype come from? OT is overwriting this now but I'm still confused\n",
+ "* Drug (main table) always goes with \"multiple drugs and/or\"\n",
+ "* Comparison alleles in theory I guess tells us something about the reference or baseline, not always present though\n",
+ "\n",
+ "Sentence examples:\n",
+ "* **drug**: \"Genotypes AA + AG are associated with response to ivacaftor in people with Cystic Fibrosis.\"\n",
+ " * alleles = \"AA + AG\"\n",
+ " * direction of effect = [none]\n",
+ " * pd/pk term = \"response to\"\n",
+ " * drug = \"ivacaftor\"\n",
+ " * population types = \"people\"\n",
+ " * population phenotype = \"cystic fibrosis\"\n",
+ " * comparison alleles/genotypes = [none]\n",
+ "* **fa**: \"Allele A is associated with increased activity of CFTR when treated with ivacaftor in transfected CHO cells.\"\n",
+ " * alleles = \"A\"\n",
+ " * direction of effect = \"increased\"\n",
+ " * functional term = \"activity of\"\n",
+ " * gene/gene product = \"CFTR\"\n",
+ " * when treated with/exposed to/assayed with = \"when treated with\"\n",
+ " * drug = \"ivacaftor\"\n",
+ " * cell type = \"transfected CHO cells\"\n",
+ " * comparison alleles/genotypes = [none]\n",
+ "* **pheno**: \"Genotypes AA + AG is associated with decreased severity of bone density when treated with ivacaftor in people with Cystic Fibrosis as compared to genotype GG.\"\n",
+ " * alleles = \"AA + AG\"\n",
+ " * direction of effect = \"decreased\"\n",
+ " * side effect/efficacy/other = \"severity of\"\n",
+ " * phenotype = \"bone density\" **< note distinction between this and population phenotype**\n",
+ " * when treated with/exposed to/assayed with = \"when treated with\"\n",
+ " * drug = \"ivacaftor\"\n",
+ " * population types = \"people\"\n",
+ " * population phenotype = \"cystic fibrosis\"\n",
+ " * comparison alleles/genotypes = \"GG\"\n",
+ "\n",
+ "For the simplest direction of effect annotation (i.e. not including the population, cell type, etc.), I think we only strictly need the following:\n",
+ "1. direction of effect\n",
+ "2. pd/pk term | functional term | side effect/efficacy/other\n",
+ "3. drug | gene/gene product | phenotype\n",
+ "\n",
+ "This tells us the direction (1) and what the effect is (2&3).\n",
+ "Of course we also need the alleles to associate with the appropriate evidence string, maybe also the comparison alleles/genotypes when present, and the \"is/is not associated\" column so we don't report negative results (unless we want to).\n",
+ "\n",
+ "Maybe also just the origin of the evidence (variant/drug, variant/phenotype, or functional analysis) is useful."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1b212421-daaf-4ffb-bcc4-66a2afb94794",
+ "metadata": {},
+ "source": [
+ "#### Vocabulary\n",
+ "\n",
+ "[Top of page](#Table-of-contents)\n",
+ "\n",
+ "Check some vocabulary - how variable or consistent are the most critical terms, are they using fixed vocab, etc."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
+ "id": "d5a77ac2-d058-426b-8d54-10aefd5023c3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'decreased', 'increased', nan}"
+ ]
+ },
+ "execution_count": 161,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# DoE fixed vocab according to readme\n",
+ "set(main_with_var['Direction of effect']) | set(main_with_var['Direction of effect_var_pheno']) | set(main_with_var['Direction of effect_var_fa'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 158,
+ "id": "faa11da3-07ad-4019-bfd7-589342c435aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'clearance of',\n",
+ " 'clinical benefit to',\n",
+ " 'concentrations of',\n",
+ " 'discontinuation of',\n",
+ " 'dose of',\n",
+ " 'dose-adjusted trough concentrations of',\n",
+ " 'exposure to',\n",
+ " 'half-life time of',\n",
+ " 'metabolism of',\n",
+ " nan,\n",
+ " 'resistance to',\n",
+ " 'response to',\n",
+ " 'steady-state concentration of',\n",
+ " 'time to response to',\n",
+ " 'trough concentration of'}"
+ ]
+ },
+ "execution_count": 158,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "set(main_with_var['PD/PK terms']) # not limited according to readme"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 159,
+ "id": "61e8c91c-c963-4710-82f1-e6ec9f4c8def",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'activity of',\n",
+ " 'affinity to',\n",
+ " 'catalytic activity of',\n",
+ " 'clearance of',\n",
+ " 'concentrations of',\n",
+ " 'enzyme activity of',\n",
+ " 'expression of',\n",
+ " 'formation of',\n",
+ " 'glucuronidation of',\n",
+ " 'half-life of',\n",
+ " 'inhibition of',\n",
+ " 'metabolism of',\n",
+ " nan,\n",
+ " 'protein stability of',\n",
+ " 'sensitivity to',\n",
+ " 'steady-state level of',\n",
+ " 'sulfation of',\n",
+ " 'transcription of',\n",
+ " 'transport of',\n",
+ " 'uptake of'}"
+ ]
+ },
+ "execution_count": 159,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "set(main_with_var['Functional terms']) # not limited"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 160,
+ "id": "87c61724-3034-413f-b691-6ea8d475ae5d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'age at onset of', 'likelihood of', nan, 'risk of', 'severity of'}"
+ ]
+ },
+ "execution_count": 160,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "set(main_with_var['Side effect/efficacy/other']) # limited"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da232356-d984-4e35-bbe3-e5811b68d172",
+ "metadata": {},
+ "source": [
+ "Assume the final term (drug | gene/gene product | phenotype) will vary, but hopefully we can also map it to the relevant domain if needed (CHEMBL, EFO, Ensembl). Drugs & genes terms look about what you'd expect, as usual phenotype is the most diverse (see below; the readme explicitly states phenotype is not standardized).\n",
+ "\n",
+ "Otherwise this is a pretty small and consistent set of terms for ca. 15000 rows, though I don't think we can assume the vocab won't grow (except the actual direction word, we should be good there)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 228,
+ "id": "fb68e303-a627-418a-83d0-e1894a382e65",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['PK:differences in exposure to the active metabolite of prasugrel',\n",
+ " 'Disease:Endometrial Neoplasms',\n",
+ " '\"Disease:Epidermal Necrolysis, Toxic\", \"Disease:Stevens-Johnson Syndrome\"',\n",
+ " 'Side Effect:total hemorrhage and major hemorrhage',\n",
+ " 'Other:subjective feelings of intoxication, stimulation, sedation, and happiness',\n",
+ " 'PK:plasma oxymorphone/oxycodone ratio',\n",
+ " 'Disease:Hematologic Diseases',\n",
+ " 'Side Effect:Venous Thrombosis',\n",
+ " 'Efficacy:non-remission',\n",
+ " 'Side Effect:Leukopenia']"
+ ]
+ },
+ "execution_count": 228,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "list(set(main_with_var['Phenotype']))[1:11]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 219,
+ "id": "39f1d19c-48cd-40c4-8e5b-a13e13364272",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1735"
+ ]
+ },
+ "execution_count": 219,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(set(main_with_var['Phenotype']))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 216,
+ "id": "6182da30-8a3e-4f28-8994-17aa028a2f55",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'Disease', 'Efficacy', 'Other', 'PK', 'Side Effect'}"
+ ]
+ },
+ "execution_count": 216,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Dirty attempt to get the prefix - doesn't account for multiples\n",
+ "set(main_with_var['Phenotype'].dropna().apply(lambda p: p.split(':')[0].strip('\"')))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 218,
+ "id": "0965c47c-84f1-4b3b-b164-eab5011b3c94",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1499"
+ ]
+ },
+ "execution_count": 218,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(set(main_with_var['Phenotype'].dropna().apply(lambda p: p.split(':')[1].strip('\"'))))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "49c5775d-3fd8-4ab7-9e41-3cb47923555e",
+ "metadata": {},
+ "source": [
+ "The prefix looks to be fixed and always present, which is nice and honestly kind of surprising actually. The rest I think can come from body of PGKB terms or be filled in freely. We could perhaps map them (with OLS or NLP).\n",
+ "\n",
+ "Same is true for population phenotypes when present:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 226,
+ "id": "b3dbc780-9a76-4b2d-bf11-a090e9f66d7e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'Disease', 'Efficacy', 'Other', 'PK', 'Side Effect'}"
+ ]
+ },
+ "execution_count": 226,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# var_fa does not have the column so I can't use my nice function :(\n",
+ "(\n",
+ " set(main_with_var['Population Phenotypes or diseases'].dropna().apply(lambda p: p.split(':')[0].strip('\"'))) \n",
+ " | set(main_with_var['Population Phenotypes or diseases_var_pheno'].dropna().apply(lambda p: p.split(':')[0].strip('\"'))) \n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "59bcdfc1-98df-4f21-aa2e-487b2d4af3dc",
+ "metadata": {},
+ "source": [
+ "## Alleles and genotypes\n",
+ "\n",
+ "[Top of page](#Table-of-contents)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "423328ff-d104-474f-bc96-153df5945c19",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Visual inspection of alleles - output suppressed for brevity\n",
+ "main_with_var_values_in('Alleles')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9279f41-dd40-4d15-a553-6ab08f8312b4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "main_with_var_values_in('Comparison Allele(s) or Genotype(s)')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "63924c24-053d-4fc2-806a-4e77fad198b4",
+ "metadata": {},
+ "source": [
+ "Alleles and comparison alleles look relatively consistent with what's in the alleles table:\n",
+ "* SNP genotype `C/T` or `CC` (annoying)\n",
+ "* SNP allele `C`\n",
+ "* indel `GGGGAGCTTTCCCAGAGACCC/del`\n",
+ "* named allele `*17` or `HTTLPR short form (S allele)`\n",
+ "* named genotype `*2/*4`\n",
+ "* combinations of the above delineated with `+`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 203,
+ "id": "589d7070-76d0-4559-bafc-b422646134ca",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['G6PD Canton, Taiwan-Hakka, Gifu-like, Agrigento-like',\n",
+ " 'G6PD A- 202A_376G',\n",
+ " 'G6PD A- 202A_376G, G6PD B (reference)',\n",
+ " 'CYP1A2 high activity',\n",
+ " 'SLC6A4 HTTLPR long form (L allele), SLC6A4 HTTLPR short form (S allele)',\n",
+ " 'SLC6A4 HTTLPR short form (S allele)',\n",
+ " 'CYP2D6 poor metabolizer genotype',\n",
+ " 'CYP1A2 low activity',\n",
+ " 'CYP2A6 poor metabolizer genotype',\n",
+ " 'CYP2D6 ultrarapid metabolizer genotype',\n",
+ " 'CYP2D6 low activity',\n",
+ " 'CYP2A6 low activity',\n",
+ " 'G6PD B (reference), G6PD Mediterranean Haplotype',\n",
+ " 'CYP2C19 poor metabolizer phenotype',\n",
+ " 'CYP2C19 poor metabolizers',\n",
+ " 'CYP2C19 poor metabolizer genotype',\n",
+ " 'CYP2D6 poor metabolizer phenotype',\n",
+ " 'CYP3A4 low activity',\n",
+ " 'TPMT intermediate metabolizer phenotype',\n",
+ " 'G6PD deficiency',\n",
+ " 'NAT2 slow acetylator',\n",
+ " 'CYP2D6 ultrarapid metabolizer phenotype',\n",
+ " 'CYP2D6 poor and ultrarapid metabolizers',\n",
+ " 'CYP2D6 poor metabolizer and intermediate metabolizer genotypes',\n",
+ " 'CYP2D6 normal metabolizer and ultrarapid metabolizer genotypes',\n",
+ " 'CYP2C19 normal metabolizers',\n",
+ " 'CYP2C19 poor metabolizer and intermediate metabolizer genotypes',\n",
+ " 'SLC6A4 HTTLPR short form (S allele), SLC6A4 L allele-rs25531C, SLC6A4 L allele-rs25531T',\n",
+ " 'CYP2C9 poor metabolizer',\n",
+ " 'GSTT1 non-null, GSTT1 null',\n",
+ " 'CYP2A6 intermediate activity',\n",
+ " 'G6PD B (reference), G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n",
+ " 'G6PD A- 202A_376G, G6PD B (reference), G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n",
+ " 'GSTM1 non-null, GSTM1 null',\n",
+ " 'G6PD A- 202A_376G, G6PD B (reference), G6PD Mediterranean Haplotype, G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n",
+ " 'TPMT intermediate metabolizer genotype',\n",
+ " 'CYP2C19 intermediate metabolizer',\n",
+ " 'G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n",
+ " 'CYP2D6 normal metabolizer genotype',\n",
+ " 'G6PD B (reference), G6PD Mediterranean Haplotype, G6PD Mediterranean, Dallas, Panama, Sassari, Cagliari, Birmingham',\n",
+ " 'CYP2D6 poor metabolizer',\n",
+ " 'GSTT1 null',\n",
+ " 'G6PD B (reference), G6PD Canton, Taiwan-Hakka, Gifu-like, Agrigento-like',\n",
+ " 'TPMT poor metabolizer phenotype',\n",
+ " 'CYP2D6 poor metabolizers',\n",
+ " 'CYP2C19 normal metabolizer',\n",
+ " 'CYP2C19 intermediate metabolizers',\n",
+ " 'CYP2D6 normal metabolizers',\n",
+ " 'GSTM1 null',\n",
+ " 'NAT2 intermediate acetylator',\n",
+ " 'TPMT poor metabolizers',\n",
+ " 'CYP2D6 intermediate metabolizers',\n",
+ " 'GSTM1 non-null',\n",
+ " 'TPMT intermediate metabolizers',\n",
+ " 'SLC6A4 L allele-rs25531T',\n",
+ " 'CYP2C19 poor metabolizer',\n",
+ " 'CYP2D6 poor metabolizers and intermediate metabolizers']"
+ ]
+ },
+ "execution_count": 203,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "variant_haps = main_with_var_values_in('Variant/Haplotypes')\n",
+ "\n",
+ "[v for v in variant_haps if pd.notna(v) and not (v.startswith('rs') or '*' in v)]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0beac63b-2343-4410-a8ac-24896e140dd0",
+ "metadata": {},
+ "source": [
+ "Some of these are just named (non-star) alleles, but things like \"CYP2A6 poor metabolizer genotype\" are where the comparison metabolyzer gets used as opposed to comparison alleles.\n",
+ "\n",
+ "Not sure what to do about these - we can't easily associate them with an allele or genotype, only with the clinical annotation as a whole.\n",
+ "\n",
+ "Here's [one example](https://www.pharmgkb.org/clinicalAnnotation/1139506787) - clincial annotation has many haplotype-level annotations, but the variant annotation is only given for \"poor metabolizer genotype\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 240,
+ "id": "2cb28399-d5e8-4e41-9d66-584c0dc20fd1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def main_with_var_where_equals(common_col_name, value):\n",
+ " # Filter main_with_var on columns = value that are common to all three variant annotation tables\n",
+ " return main_with_var[\n",
+ " (main_with_var[common_col_name] == value)\n",
+ " | (main_with_var[f'{common_col_name}_var_pheno'] == value)\n",
+ " | (main_with_var[f'{common_col_name}_var_fa'] == value)\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 243,
+ "id": "dc941094-468a-409e-b099-895d9935bb52",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Clinical Annotation ID | \n",
+ " Variant/Haplotypes | \n",
+ " Gene | \n",
+ " Level of Evidence | \n",
+ " Phenotype Category | \n",
+ " Drug(s) | \n",
+ " Phenotype(s) | \n",
+ " Evidence ID | \n",
+ " Evidence Type | \n",
+ " PMID | \n",
+ " Summary | \n",
+ " Variant Annotation ID_var_pheno | \n",
+ " Variant/Haplotypes_var_pheno | \n",
+ " Gene_var_pheno | \n",
+ " Drug(s)_var_pheno | \n",
+ " PMID_var_pheno | \n",
+ " Phenotype Category_var_pheno | \n",
+ " Significance_var_pheno | \n",
+ " Notes_var_pheno | \n",
+ " Sentence_var_pheno | \n",
+ " Alleles_var_pheno | \n",
+ " Specialty Population_var_pheno | \n",
+ " Metabolizer types_var_pheno | \n",
+ " isPlural_var_pheno | \n",
+ " Is/Is Not associated_var_pheno | \n",
+ " Direction of effect_var_pheno | \n",
+ " Side effect/efficacy/other | \n",
+ " Phenotype | \n",
+ " Multiple phenotypes And/or | \n",
+ " When treated with/exposed to/when assayed with | \n",
+ " Multiple drugs And/or_var_pheno | \n",
+ " Population types_var_pheno | \n",
+ " Population Phenotypes or diseases_var_pheno | \n",
+ " Multiple phenotypes or diseases And/or_var_pheno | \n",
+ " Comparison Allele(s) or Genotype(s)_var_pheno | \n",
+ " Comparison Metabolizer types_var_pheno | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 10622 | \n",
+ " 1139506787 | \n",
+ " CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55 | \n",
+ " CYP2A6 | \n",
+ " 1B | \n",
+ " Metabolism/PK | \n",
+ " nicotine | \n",
+ " Tobacco Use Disorder | \n",
+ " 1183689160 | \n",
+ " Variant Phenotype Annotation | \n",
+ " 23371292 | \n",
+ " CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer. | \n",
+ " 1183689160 | \n",
+ " CYP2A6 poor metabolizer genotype | \n",
+ " CYP2A6 | \n",
+ " nicotine | \n",
+ " 23371292 | \n",
+ " Metabolism/PK | \n",
+ " yes | \n",
+ " In CYP2A6 reduced metabolizers, cotinine formation was altered less than was cotinine removal as compared to normal metabolizers. Ratios of cotinine formation to removal were 1.31 for reduced metabolizers and 1.12 for normal metabolizers . Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35. | \n",
+ " CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer. | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " poor metabolizer | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " increased | \n",
+ " NaN | \n",
+ " PK:ratio of cotinine formation to removal | \n",
+ " NaN | \n",
+ " when exposed to | \n",
+ " NaN | \n",
+ " in | \n",
+ " Other:nonsmokers | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " normal metabolizer | \n",
+ "
\n",
+ " \n",
+ " 10623 | \n",
+ " 1139506787 | \n",
+ " CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55 | \n",
+ " CYP2A6 | \n",
+ " 1B | \n",
+ " Metabolism/PK | \n",
+ " nicotine | \n",
+ " Tobacco Use Disorder | \n",
+ " 1183689165 | \n",
+ " Variant Phenotype Annotation | \n",
+ " 23371292 | \n",
+ " CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer. | \n",
+ " 1183689165 | \n",
+ " CYP2A6 poor metabolizer genotype | \n",
+ " CYP2A6 | \n",
+ " nicotine | \n",
+ " 23371292 | \n",
+ " Metabolism/PK | \n",
+ " yes | \n",
+ " In CYP2A6 reduced metabolizers, the slope between urinary TNE (a measurement of tobacco exposure) and plasma cotinine was significantly lower as compared to normal metabolizers. Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35. | \n",
+ " CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer. | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " poor metabolizer | \n",
+ " Is | \n",
+ " Associated with | \n",
+ " decreased | \n",
+ " NaN | \n",
+ " PK:ratio of plasma cotinine to urinary TNE | \n",
+ " NaN | \n",
+ " when exposed to | \n",
+ " NaN | \n",
+ " in | \n",
+ " Other:smokers | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " normal metabolizer | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Clinical Annotation ID \\\n",
+ "10622 1139506787 \n",
+ "10623 1139506787 \n",
+ "\n",
+ " Variant/Haplotypes \\\n",
+ "10622 CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55 \n",
+ "10623 CYP2A6*1, CYP2A6*1x2, CYP2A6*2, CYP2A6*4, CYP2A6*7, CYP2A6*9, CYP2A6*10, CYP2A6*11, CYP2A6*12, CYP2A6*13, CYP2A6*14, CYP2A6*15, CYP2A6*17, CYP2A6*19, CYP2A6*20, CYP2A6*23, CYP2A6*24, CYP2A6*25, CYP2A6*26, CYP2A6*27, CYP2A6*28, CYP2A6*35, CYP2A6*38, CYP2A6*39, CYP2A6*41, CYP2A6*46, CYP2A6*55 \n",
+ "\n",
+ " Gene Level of Evidence Phenotype Category Drug(s) \\\n",
+ "10622 CYP2A6 1B Metabolism/PK nicotine \n",
+ "10623 CYP2A6 1B Metabolism/PK nicotine \n",
+ "\n",
+ " Phenotype(s) Evidence ID Evidence Type \\\n",
+ "10622 Tobacco Use Disorder 1183689160 Variant Phenotype Annotation \n",
+ "10623 Tobacco Use Disorder 1183689165 Variant Phenotype Annotation \n",
+ "\n",
+ " PMID \\\n",
+ "10622 23371292 \n",
+ "10623 23371292 \n",
+ "\n",
+ " Summary \\\n",
+ "10622 CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer. \n",
+ "10623 CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer. \n",
+ "\n",
+ " Variant Annotation ID_var_pheno Variant/Haplotypes_var_pheno \\\n",
+ "10622 1183689160 CYP2A6 poor metabolizer genotype \n",
+ "10623 1183689165 CYP2A6 poor metabolizer genotype \n",
+ "\n",
+ " Gene_var_pheno Drug(s)_var_pheno PMID_var_pheno \\\n",
+ "10622 CYP2A6 nicotine 23371292 \n",
+ "10623 CYP2A6 nicotine 23371292 \n",
+ "\n",
+ " Phenotype Category_var_pheno Significance_var_pheno \\\n",
+ "10622 Metabolism/PK yes \n",
+ "10623 Metabolism/PK yes \n",
+ "\n",
+ " Notes_var_pheno \\\n",
+ "10622 In CYP2A6 reduced metabolizers, cotinine formation was altered less than was cotinine removal as compared to normal metabolizers. Ratios of cotinine formation to removal were 1.31 for reduced metabolizers and 1.12 for normal metabolizers . Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35. \n",
+ "10623 In CYP2A6 reduced metabolizers, the slope between urinary TNE (a measurement of tobacco exposure) and plasma cotinine was significantly lower as compared to normal metabolizers. Reduced metabolizers were defined as subjects with one or two copies of *2,*4, *7,*9,*10,*12,*17,*35. \n",
+ "\n",
+ " Sentence_var_pheno \\\n",
+ "10622 CYP2A6 poor metabolizer is associated with increased ratio of cotinine formation to removal when exposed to nicotine in nonsmokers as compared to CYP2A6 normal metabolizer. \n",
+ "10623 CYP2A6 poor metabolizer is associated with decreased ratio of plasma cotinine to urinary TNE when exposed to nicotine in smokers as compared to CYP2A6 normal metabolizer. \n",
+ "\n",
+ " Alleles_var_pheno Specialty Population_var_pheno \\\n",
+ "10622 NaN NaN \n",
+ "10623 NaN NaN \n",
+ "\n",
+ " Metabolizer types_var_pheno isPlural_var_pheno \\\n",
+ "10622 poor metabolizer Is \n",
+ "10623 poor metabolizer Is \n",
+ "\n",
+ " Is/Is Not associated_var_pheno Direction of effect_var_pheno \\\n",
+ "10622 Associated with increased \n",
+ "10623 Associated with decreased \n",
+ "\n",
+ " Side effect/efficacy/other Phenotype \\\n",
+ "10622 NaN PK:ratio of cotinine formation to removal \n",
+ "10623 NaN PK:ratio of plasma cotinine to urinary TNE \n",
+ "\n",
+ " Multiple phenotypes And/or \\\n",
+ "10622 NaN \n",
+ "10623 NaN \n",
+ "\n",
+ " When treated with/exposed to/when assayed with \\\n",
+ "10622 when exposed to \n",
+ "10623 when exposed to \n",
+ "\n",
+ " Multiple drugs And/or_var_pheno Population types_var_pheno \\\n",
+ "10622 NaN in \n",
+ "10623 NaN in \n",
+ "\n",
+ " Population Phenotypes or diseases_var_pheno \\\n",
+ "10622 Other:nonsmokers \n",
+ "10623 Other:smokers \n",
+ "\n",
+ " Multiple phenotypes or diseases And/or_var_pheno \\\n",
+ "10622 NaN \n",
+ "10623 NaN \n",
+ "\n",
+ " Comparison Allele(s) or Genotype(s)_var_pheno \\\n",
+ "10622 NaN \n",
+ "10623 NaN \n",
+ "\n",
+ " Comparison Metabolizer types_var_pheno \n",
+ "10622 normal metabolizer \n",
+ "10623 normal metabolizer "
+ ]
+ },
+ "execution_count": 243,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "main_with_var_where_equals('Variant/Haplotypes', 'CYP2A6 poor metabolizer genotype')[[\n",
+ " 'Clinical Annotation ID', 'Variant/Haplotypes', 'Gene',\n",
+ " 'Level of Evidence', 'Phenotype Category', 'Drug(s)', 'Phenotype(s)',\n",
+ " 'Evidence ID', 'Evidence Type', 'PMID', 'Summary',\n",
+ " 'Variant Annotation ID_var_pheno', 'Variant/Haplotypes_var_pheno',\n",
+ " 'Gene_var_pheno', 'Drug(s)_var_pheno', 'PMID_var_pheno',\n",
+ " 'Phenotype Category_var_pheno', 'Significance_var_pheno',\n",
+ " 'Notes_var_pheno', 'Sentence_var_pheno', 'Alleles_var_pheno',\n",
+ " 'Specialty Population_var_pheno', 'Metabolizer types_var_pheno',\n",
+ " 'isPlural_var_pheno', 'Is/Is Not associated_var_pheno',\n",
+ " 'Direction of effect_var_pheno', 'Side effect/efficacy/other',\n",
+ " 'Phenotype', 'Multiple phenotypes And/or',\n",
+ " 'When treated with/exposed to/when assayed with',\n",
+ " 'Multiple drugs And/or_var_pheno', 'Population types_var_pheno',\n",
+ " 'Population Phenotypes or diseases_var_pheno',\n",
+ " 'Multiple phenotypes or diseases And/or_var_pheno',\n",
+ " 'Comparison Allele(s) or Genotype(s)_var_pheno',\n",
+ " 'Comparison Metabolizer types_var_pheno'\n",
+ "]]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "daf324bb-3f75-46db-be7c-1cd8ea561964",
+ "metadata": {},
+ "source": [
+ "## Bonus material\n",
+ "\n",
+ "[Top of page](#Table-of-contents)\n",
+ "\n",
+ "Things I thought about but haven't checked yet:\n",
+ "* How many so-called \"alleles\" are actually these metabolyzer terms?\n",
+ " * might inform whether we need to associate via something other than allele, if many important annotations fall under this category\n",
+ "* Do we have to manage contradictory information for a single clinical annotation or even for a single allele/genotype?\n",
+ " * i.e. one study says genotype AA increases X, another says it decreases X, another says it decreases some other Y...\n",
+ " * maybe also check whether this occurs in level 1/2 evidence especially\n",
+ " * informs data structure - i.e. do fields need to be lists or strings\n",
+ "* How do multiple phenotypes, genes and drugs at the variant annotation level get aggregated at the clinical annotation level?\n",
+ " * [981755803](https://www.pharmgkb.org/clinicalAnnotation/981755803) indicates they do _not_ include all \"phenotype\" and \"population phenotype\" for all variant annotations at the clinical annotation level\n",
+ " * similarly interested in how they derive the clinical annotation sentence from all the variant annotation sentences, though it's arguably not important for our automated processing\n",
+ " \n",
+ "I also think there are at least 2 additional issues (not relating to direction of effect) that we can explore using these variant annotation tables, namely:\n",
+ "* Using the \"and/or\" column to clearly delineate drug combinations vs. drugs that are just being annotated together\n",
+ "* Using the additional phenotype annotations (side effect etc.) to disambiguate or supplement the phenotype information we use from the clinical annotation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "72471a5a-8c7f-4d4c-ae6f-92fa86c13d34",
+ "metadata": {},
+ "source": [
+ "## Post-meeting\n",
+ "\n",
+ "* Get a few representative (?!) examples of annotations\n",
+ "* Join with all variant evidence _and_ all clinical_alleles\n",
+ "* Dump to CSV"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 285,
+ "id": "442da098-f2a2-4436-89a0-e7a4760aad7d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Build a clean table showing everything\n",
+ "complete_df = pd.merge(clinical_annotations, clinical_ann_evidence, how='left', on=ID_COL_NAME)\n",
+ "complete_df = pd.merge(complete_df, clinical_ann_alleles, how='left', on=ID_COL_NAME)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 286,
+ "id": "dc9116ed-cf4d-4c01-9a2d-f6189a77ee0e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def get_annotation_tables_for_ids(ca_ids):\n",
+ " df = complete_df[complete_df[ID_COL_NAME].isin({str(id) for id in ca_ids})]\n",
+ " df_drug = pd.merge(df, var_drug_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_drug'))\n",
+ " df_pheno = pd.merge(df, var_pheno_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_pheno'))\n",
+ " df_fa = pd.merge(df, var_fa_ann, left_on='Evidence ID', right_on='Variant Annotation ID', how='inner', suffixes=(None, '_var_fa'))\n",
+ " return df_drug, df_pheno, df_fa"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 276,
+ "id": "86bab485-ccf8-426b-9565-01fe35058871",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "example_ca_ids = [981755803, 1139506787, 1183888969, 1184514050, 981419266]\n",
+ "\n",
+ "d, p, f = get_annotation_tables_for_ids(example_ca_ids)\n",
+ "d.to_csv(f'{data_dir}/example_drug.csv', index=False)\n",
+ "p.to_csv(f'{data_dir}/example_pheno.csv', index=False)\n",
+ "f.to_csv(f'{data_dir}/example_func.csv', index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b4cee848-abf7-422d-9d64-c3c4f66f5242",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/requirements.txt b/requirements.txt
index 552658c..4c30a0e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,6 +2,6 @@ jsonschema==3.2.0
numpy==1.24.3
pandas==1.5.3
pytest==7.2.2
-requests==2.31.0
+requests==2.32.0
retry==0.9.2
cmat @ git+https://github.com/EBIvariation/eva-opentargets.git#egg=cmat
\ No newline at end of file