Skip to content

Commit

Permalink
Merge pull request #32 from monarch-initiative/sssom-mappings
Browse files Browse the repository at this point in the history
Implement proper SSSOM validation
  • Loading branch information
glass-ships authored Nov 13, 2023
2 parents f1882d0 + f589a59 commit e9582ff
Show file tree
Hide file tree
Showing 7 changed files with 1,275 additions and 19 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/qc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ jobs:
DEFAULT_BRANCH: main
run: pip install --upgrade pip && pip install -U sssom
- uses: actions/checkout@v2
- name: Prepare mappings
env:
DEFAULT_BRANCH: main
run: make RUN="" mappings
- name: Run Mapping QC checks
env:
DEFAULT_BRANCH: main
Expand Down
14 changes: 14 additions & 0 deletions metadata/gene_mappings.sssom.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
curie_map:
owl: http://www.w3.org/2002/07/owl#
Xenbase: https://www.xenbase.org/entry/
FB: https://flybase.org/reports/
RGD: http://rgd.mcw.edu/rgdweb/report/gene/main.html?id=
HGNC: http://identifiers.org/hgnc/
WB: https://www.wormbase.org/get?name=
OMIM: "https://omim.org/MIM:"
UniProtKB: http://purl.uniprot.org/uniprot/
ZFIN: http://zfin.org/
license: https://creativecommons.org/licenses/by/4.0/
mapping_set_description: The monarch gene mappings, based on HGNC and ENSEMBL base mappings.
mapping_set_id: https://data.monarchinitiative.org/mappings/gene_mappings.sssom.tsv
mapping_set_source: https://w3id.org/biopragmatics/biomappings/sssom/biomappings.sssom.tsv
6 changes: 6 additions & 0 deletions metadata/mesh_chebi_biomappings.sssom.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
curie_map:
MESH: http://id.nlm.nih.gov/mesh/
license: https://creativecommons.org/licenses/by/4.0/
mapping_set_description: A subset of the biomappings containing mesh to chebi associations.
mapping_set_id: https://data.monarchinitiative.org/mappings/mesh_chebi_biomappings.sssom.tsv
mapping_set_source: https://w3id.org/biopragmatics/biomappings/sssom/biomappings.sssom.tsv
17 changes: 13 additions & 4 deletions monarch_mapping_commons.Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,27 @@ MAPPING_DIR = mappings
SCRIPT_DIR = scripts
SRC_DIR = sources
TMP_DIR = tmp
METADATA_DIR = metadata
RUN = poetry run


$(MAPPING_DIR)/ $(SCRIPT_DIR)/ $(SRC_DIR)/ $(TMP_DIR)/:
mkdir -p $@

$(MAPPING_DIR)/mondo.sssom.tsv: | $(MAPPING_DIR)/
$(MAPPING_DIR)/mondo.sssom.tsv:
mkdir -p $(MAPPING_DIR) $(TMP_DIR)
wget http://purl.obolibrary.org/obo/mondo/mappings/mondo.sssom.tsv -O $@

$(MAPPING_DIR)/biomappings.sssom.tsv: | $(MAPPING_DIR)/ $(SCRIPT_DIR)/ $(TMP_DIR)/
$(MAPPING_DIR)/mesh_chebi_biomappings.sssom.tsv:
mkdir -p $(MAPPING_DIR) $(TMP_DIR)
wget https://raw.githubusercontent.com/biopragmatics/biomappings/master/docs/_data/sssom/biomappings.sssom.tsv -O $(TMP_DIR)/biomappings.sssom.tsv
$(RUN) python3 $(SCRIPT_DIR)/process_biomappings.py --input $(TMP_DIR)/biomappings.sssom.tsv --output $(MAPPING_DIR)/mesh_chebi_biomappings.sssom.tsv
$(RUN) python3 $(SCRIPT_DIR)/process_biomappings.py --input $(TMP_DIR)/biomappings.sssom.tsv --output $(TMP_DIR)/mesh_chebi_biomappings.sssom.tsv
sssom parse $(TMP_DIR)/mesh_chebi_biomappings.sssom.tsv -m $(METADATA_DIR)/mesh_chebi_biomappings.sssom.yml --prefix-map-mode merged -o $@

$(MAPPING_DIR)/gene_mappings.sssom.tsv:
wget http://data.monarchinitiative.org/monarch-gene-mapping/latest/gene_mappings.tsv -O $@
mkdir -p $(MAPPING_DIR) $(TMP_DIR)
wget http://data.monarchinitiative.org/monarch-gene-mapping/latest/gene_mappings.tsv -O $(TMP_DIR)/gene_mappings.sssom.tsv
# see https://github.com/monarch-initiative/monarch-mapping-commons/issues/33
grep -v "<NA>" $(TMP_DIR)/gene_mappings.sssom.tsv > $@.tmp && mv $@.tmp $(TMP_DIR)/gene_mappings.sssom.tsv
grep -v ";" $(TMP_DIR)/gene_mappings.sssom.tsv > $@.tmp && mv $@.tmp $(TMP_DIR)/gene_mappings.sssom.tsv
sssom parse $(TMP_DIR)/gene_mappings.sssom.tsv -m $(METADATA_DIR)/gene_mappings.sssom.yml --prefix-map-mode merged -o $@
1,246 changes: 1,234 additions & 12 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ cruft = "^2.15.0"
pandas = "^2.1.1"
curies = "^0.6.4"
pytest = "^7.4.2"
sssom = "^0.3.41"


[build-system]
Expand Down
6 changes: 3 additions & 3 deletions registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ mapping_set_references:
local_id: mondo

# Biomappings for CTD
- mapping_set_id: http://w3id.org/sssom/commons/monarch/biomappings.sssom.tsv
# mapping_set_group: disease_mappings
local_name: biomappings.sssom.tsv
- mapping_set_id: http://w3id.org/sssom/commons/monarch/mesh_chebi_biomappings.sssom.tsv
mapping_set_group: chemical_mappings
local_name: mesh_chebi_biomappings.sssom.tsv
mirror_from: https://raw.githubusercontent.com/biopragmatics/biomappings/master/docs/_data/sssom/biomappings.sssom.tsv
mapping_set_confidence: "0.9"

Expand Down

0 comments on commit e9582ff

Please sign in to comment.