From e8dc36fcaf0375da1c653918268a5dc7747e6b36 Mon Sep 17 00:00:00 2001 From: Kevin Schaper Date: Thu, 11 Jul 2024 13:23:21 -0700 Subject: [PATCH] Remove SPOM_ prefix that NCBI Gene is adding to PomBase systematic names --- src/monarch_gene_mapping/cli_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/monarch_gene_mapping/cli_utils.py b/src/monarch_gene_mapping/cli_utils.py index ad50834e..9d75529b 100644 --- a/src/monarch_gene_mapping/cli_utils.py +++ b/src/monarch_gene_mapping/cli_utils.py @@ -281,6 +281,7 @@ def generate_gene_mappings() -> DataFrame: object_curie_prefix="NCBIGene:", predicate_id="skos:exactMatch", mapping_justification="semapv:UnspecifiedMatching") + pombase_to_ncbi['subject_id'] = pombase_to_ncbi['subject_id'].str.replace("SPOM_","") # remove SPOM_ prefix valid_pombase_genes = pd.read_csv("data/pombase/gene_IDs_names_products.tsv", sep="\t", usecols=["gene_systematic_id_with_prefix"]) # only keep rows where the subject_id is in valid_pombase_genes