diff --git a/.gitignore b/.gitignore index 59adb45..607ba9f 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,8 @@ test/input/* !test/input/.keep test/output/* !test/output/.keep + +# input +input/* +!input/owl-files/ +!input/data/ diff --git a/makefile b/makefile new file mode 100644 index 0000000..76145a9 --- /dev/null +++ b/makefile @@ -0,0 +1,83 @@ +.PHONY=all chebi-subsets + +# All ------------------------------------------------------------------------------------------------------------------ +all: chebi-subsets + +# Analysis ------------------------------------------------------------------------------------------------------------- +input/analysis/: + mkdir -p $@ + +output/analysis/: + mkdir -p $@ + +# - ChEBI subsets +PART_MAPPINGS=loinc_release/Loinc_2.78/AccessoryFiles/PartFile/PartRelatedCodeMapping.csv +CHEBI_OWL=input/analysis/chebi.owl +CHEBI_MODULE=output/analysis/chebi_module.txt +CHEBI_OUT_BOT=output/analysis/chebi-subset-BOT.owl +CHEBI_OUT_MIREOT=output/analysis/chebi-subset-MIREOT.owl + +chebi-subsets: $(CHEBI_OUT_BOT) $(CHEBI_OUT_MIREOT) + +input/analysis/chebi.owl.gz: | input/analysis/ + wget -O $@ ftp://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.owl.gz + +input/analysis/chebi.owl: input/analysis/chebi.owl.gz + gunzip -c $< > $@ + rm $< + +# todo: bug fix for label comment: Alwyas shows up as ' # ,'. Alternatively, I could just not include the label comment. +$(CHEBI_MODULE): $(PART_MAPPINGS) | output/analysis/ + awk -F'",' '/ebi\.ac\.uk\/chebi/ { \ + split($$0, parts, "\""); \ + for (i=1; i<=NF; i++) { \ + if (parts[i] ~ /CHEBI:/) { \ + id = parts[i]; \ + gsub(".*CHEBI:", "http://purl.obolibrary.org/obo/CHEBI_", id); \ + gsub(",.*", "", id); \ + print id " # " parts[i+1] \ + } \ + } \ + }' $< > $@ + +# BOT: use the SLME (Syntactic Locality Module Extractor) to extract a bottom module +# - Source: https://robot.obolibrary.org/extract +# - The BOT, or BOTTOM, -module contains mainly the terms in the seed, plus all their super-classes and the +# inter-relations between them. The module is called BOT (or BOTTOM) because it takes a view from the BOTTOM of the +# class-hierarchy upwards. Modules of this type are typically of a medium size and should be used if there is a need to +# include all super-classes in the module. This is the most widely used module type - when in doubt, use this one. +$(CHEBI_OUT_BOT): $(CHEBI_OWL) $(CHEBI_MODULE) + robot extract --method BOT \ + --input $(CHEBI_OWL) \ + --term-file $(CHEBI_MODULE) \ + --output $@ + +# MIREOT: Minimum Information to Reference an External Ontology Term +# - Source: https://robot.obolibrary.org/extract +# - To specify upper and lower term files, use --upper-terms and --lower-terms. The upper terms are the upper boundaries +# of what will be extracted. If no upper term is specified, all terms up to the root (owl:Thing) will be returned. The +# lower term (or terms) is required; this is the limit to what will be extracted, e.g. no descendants of the lower term +# will be included in the result. +$(CHEBI_OUT_MIREOT): $(CHEBI_OWL) $(CHEBI_MODULE) + robot extract --method MIREOT \ + --input $(CHEBI_OWL) \ + --lower-terms $(CHEBI_MODULE) \ + --output $@ + +# todo: SPARQL TSV subset: Just the classes themselves? +# - might require a jinja SPARQL or something like it, populated by the module +#output/analysis/chebi-terms.tsv: $(CHEBI_OWL) | output/analysis/ +# robot query -i $< -q src/sparql/mappings.sparql $@ + +# todo: Subset? +# - Source: https://robot.obolibrary.org/extract +# - The subset method extracts a sub-ontology that contains only the seed terms (that you specify with --term and +# --term-file options) and the relations between them. This method uses the relation-graph to materialize the +# existential relations among the seed terms. Procedurally, the subset method materializes the input ontology and adds +# the inferred axioms to the input ontology. Then filters the ontology with the given seed terms. Finally, it reduces +# the filtered ontology to remove redundant subClassOf axioms. +#output/analysis/chebi-subset-subsetOnly.owl: $(CHEBI_OWL) $(CHEBI_MODULE) +# robot extract --method subset \ +# --input $(CHEBI_OWL) \ +# --term-file $(CHEBI_MODULE) \ +# --output $@