From 1d5ce7d173be6af212c1c21f23cdb970c8343781 Mon Sep 17 00:00:00 2001 From: Fatimah Zulfiqar Date: Tue, 3 Dec 2024 16:36:39 +0100 Subject: [PATCH] dcat: updated serializer for subject info --- site/zenodo_rdm/serializers/dcat.py | 50 +++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/site/zenodo_rdm/serializers/dcat.py b/site/zenodo_rdm/serializers/dcat.py index 0a537dc3..04b78f85 100644 --- a/site/zenodo_rdm/serializers/dcat.py +++ b/site/zenodo_rdm/serializers/dcat.py @@ -20,6 +20,51 @@ def __init__(self, **options): """Constructor.""" super().__init__(**options) + def add_subjects_uri(self, rdf_tree, subjects): + """Add valueURI of subjects to the corresponding dct:subject elements in the RDF tree.""" + namespaces = rdf_tree.nsmap + for subject in subjects: + value_uri = subject.get("valueURI") + subject_label = subject.get("subject") + subject_scheme = subject.get("subjectScheme") + subject_props = subject.get("subjectProps", {}) + + if value_uri and subject_label and subject_scheme: + # Find the corresponding dct:subject element by prefLabel and subjectScheme + subject_element = rdf_tree.xpath( + f""" + //dct:subject[ + skos:Concept[ + skos:prefLabel[text()='{subject_label}'] + and skos:inScheme/skos:ConceptScheme/dct:title[text()='{subject_scheme}'] + ] + ] + """, + namespaces=namespaces, + )[0] + + if subject_element: + # Add the valueURI to the dct:subject element as rdf:about + subject_element.set( + "{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", value_uri + ) + + # Check if + # subject has a definition in its props + definition = subject_props.get("definition") + if definition: + concept_elem = subject_element.find( + ".//skos:Concept", namespaces=namespaces + ) + if concept_elem is not None: + skos_definition = etree.Element( + "{http://www.w3.org/2004/02/skos/core#}definition" + ) + skos_definition.text = definition + concept_elem.append(skos_definition) + + return rdf_tree + def add_missing_creator_link(self, rdf_tree): """Add `rdf:about` attributes to within if missing.""" namespaces = rdf_tree.nsmap @@ -55,6 +100,11 @@ def transform_with_xslt(self, dc_record, **kwargs): dc_etree.tag = "{{{0}}}resource".format(dc_namespace) dcat_etree = self.xslt_transform_func(dc_etree).getroot() + # Add valueURI to subjects + subjects = dc_record.get("subjects", []) + if subjects: + dcat_etree = self.add_subjects_uri(dcat_etree, subjects) + # Add the identifier links for creators if missing dcat_etree = self.add_missing_creator_link(dcat_etree)