From ffa2226aee5fb3d962e5b24e21e7feee8c11155f Mon Sep 17 00:00:00 2001 From: James McLaughlin Date: Mon, 19 Aug 2024 01:34:22 +0100 Subject: [PATCH] skip obsolete terms in ols ingest, add mondo and doid --- 01_ingest/grebi_ingest_ols/src/main.rs | 25 ++++++++++++++++++------- configs/datasource_configs/ols.json | 2 +- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/01_ingest/grebi_ingest_ols/src/main.rs b/01_ingest/grebi_ingest_ols/src/main.rs index 342e9a9..be55b2e 100644 --- a/01_ingest/grebi_ingest_ols/src/main.rs +++ b/01_ingest/grebi_ingest_ols/src/main.rs @@ -24,7 +24,10 @@ struct Args { ontologies:String, #[arg(long)] - defining_only:bool + defining_only:bool, + + #[arg(long)] + skip_obsolete:bool } @@ -57,14 +60,14 @@ fn main() { } json.begin_array().unwrap(); while json.has_next().unwrap() { - read_ontology(&mut json, &mut output_nodes, &datasource_name, &ontology_whitelist, args.defining_only); + read_ontology(&mut json, &mut output_nodes, &datasource_name, &ontology_whitelist, args.defining_only, args.skip_obsolete); } json.end_array().unwrap(); json.end_object().unwrap(); } -fn read_ontology(json: &mut JsonStreamReader>>, output_nodes: &mut BufWriter, datasource_name: &str, ontology_whitelist:&HashSet, defining_only:bool) { +fn read_ontology(json: &mut JsonStreamReader>>, output_nodes: &mut BufWriter, datasource_name: &str, ontology_whitelist:&HashSet, defining_only:bool, skip_obsolete:bool) { json.begin_object().unwrap(); @@ -128,11 +131,11 @@ fn read_ontology(json: &mut JsonStreamReader>>, output_n loop { if key.eq("classes") { - read_entities(json, output_nodes, &datasource, "ols:Class", defining_only); + read_entities(json, output_nodes, &datasource, "ols:Class", defining_only, skip_obsolete); } else if key.eq("properties") { - read_entities(json, output_nodes, &datasource, "ols:Property", defining_only); + read_entities(json, output_nodes, &datasource, "ols:Property", defining_only, skip_obsolete); } else if key.eq("individuals") { - read_entities(json, output_nodes, &datasource, "ols:Individual", defining_only); + read_entities(json, output_nodes, &datasource, "ols:Individual", defining_only, skip_obsolete); } else { panic!(); } @@ -147,7 +150,7 @@ fn read_ontology(json: &mut JsonStreamReader>>, output_n } -fn read_entities(json: &mut JsonStreamReader>>, output_nodes: &mut BufWriter, datasource:&String, grebitype:&str, defining_only:bool) { +fn read_entities(json: &mut JsonStreamReader>>, output_nodes: &mut BufWriter, datasource:&String, grebitype:&str, defining_only:bool, skip_obsolete:bool) { json.begin_array().unwrap(); while json.has_next().unwrap() { let mut val:Value = read_value(json); @@ -161,6 +164,14 @@ fn read_entities(json: &mut JsonStreamReader>>, output_n } } + if skip_obsolete { + if obj.contains_key("ols:isObsolete") { + if get_string_values(obj.get("ols:isObsolete").unwrap()).iter().next().unwrap().eq(&"true") { + continue; + } + } + } + if grebitype.eq("ols:Property") { let qualified_safe_label = { diff --git a/configs/datasource_configs/ols.json b/configs/datasource_configs/ols.json index e66a494..181610a 100644 --- a/configs/datasource_configs/ols.json +++ b/configs/datasource_configs/ols.json @@ -6,7 +6,7 @@ "ingest_files": ["/nfs/production/parkinso/spot/grebi/ontologies.json.gz"], "ingest_script": "./target/release/grebi_ingest_ols", "ingest_args": [ - { "name": "--ontologies", "value": "efo,mp,hp,go,ro,iao,uberon,pato,oba,chebi,bspo,iao,obi,bfo,cob,cl,so,eco,pr,ncbitaxon,oio,iao,biolink" } + { "name": "--ontologies", "value": "efo,mp,hp,go,ro,iao,uberon,pato,oba,chebi,bspo,iao,obi,bfo,cob,cl,so,eco,pr,ncbitaxon,oio,iao,biolink,mondo,doid" } ] } ]