-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #9 from apriltuesday/EVA-3248
Map to Ensembl gene IDs and CHEBI IDs
- Loading branch information
Showing
10 changed files
with
314 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
import logging | ||
import os | ||
from functools import lru_cache | ||
|
||
import requests | ||
from requests import RequestException | ||
from retry import retry | ||
|
||
logging.basicConfig() | ||
logger = logging.getLogger(__name__) | ||
logger.setLevel(logging.INFO) | ||
|
||
OLS_API_ROOT = 'https://www.ebi.ac.uk/ols/api' | ||
|
||
|
||
@lru_cache | ||
@retry(exceptions=(ConnectionError, RequestException), tries=4, delay=2, backoff=1.2, jitter=(1, 3)) | ||
def get_chebi_iri(drug_name): | ||
chebi_search_url = os.path.join(OLS_API_ROOT, f'search?ontology=chebi&q={drug_name}') | ||
response = requests.get(chebi_search_url) | ||
response.raise_for_status() | ||
data = response.json() | ||
if 'response' in data: | ||
results = data['response']['docs'] | ||
candidates = set() | ||
for result in results: | ||
# Check that we've found the drug exactly (strict case-insensitive string match) | ||
if result['label'].lower() == drug_name.lower(): | ||
candidates.add(result['iri']) | ||
# Only return a result if we can find it unambiguously | ||
if len(candidates) == 1: | ||
return candidates.pop() | ||
logger.warning(f'Could not find a CHEBI IRI for {drug_name}') | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import numpy as np | ||
|
||
|
||
def none_to_nan(x): | ||
return np.nan if x is None else x | ||
|
||
|
||
def explode_column(df, source_col, target_col, sep=';'): | ||
""" | ||
Splits a string-valued column in dataframe and explodes on the values, storing them in the specified target column. | ||
Any white space around the separator will be stripped. | ||
:param df: Pandas dataframe | ||
:param source_col: name of column in df to split | ||
:param target_col: destination column name for exploded values | ||
:param sep: string separator to split source_col by (default ';') | ||
:return: dataframe with target_col added | ||
""" | ||
split_cols = df.assign(**{target_col: df[source_col].str.split(sep)}).explode(target_col).reset_index(drop=True) | ||
split_cols[target_col] = split_cols[target_col].map(lambda x: x.strip()) | ||
return split_cols |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
numpy==1.24.3 | ||
pandas==1.5.3 | ||
pytest==7.2.2 | ||
requests==2.28.2 | ||
retry==0.9.2 | ||
retry==0.9.2 | ||
cmat @ git+https://github.com/apriltuesday/eva-opentargets.git@refactor#egg=cmat |
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.