Skip to content

Commit

Permalink
dcat: updated serializer for subject info
Browse files Browse the repository at this point in the history
  • Loading branch information
0einstein0 committed Dec 3, 2024
1 parent dfd6dc3 commit 1d5ce7d
Showing 1 changed file with 50 additions and 0 deletions.
50 changes: 50 additions & 0 deletions site/zenodo_rdm/serializers/dcat.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,51 @@ def __init__(self, **options):
"""Constructor."""
super().__init__(**options)

def add_subjects_uri(self, rdf_tree, subjects):
"""Add valueURI of subjects to the corresponding dct:subject elements in the RDF tree."""
namespaces = rdf_tree.nsmap
for subject in subjects:
value_uri = subject.get("valueURI")
subject_label = subject.get("subject")
subject_scheme = subject.get("subjectScheme")
subject_props = subject.get("subjectProps", {})

if value_uri and subject_label and subject_scheme:
# Find the corresponding dct:subject element by prefLabel and subjectScheme
subject_element = rdf_tree.xpath(
f"""
//dct:subject[
skos:Concept[
skos:prefLabel[text()='{subject_label}']
and skos:inScheme/skos:ConceptScheme/dct:title[text()='{subject_scheme}']
]
]
""",
namespaces=namespaces,
)[0]

if subject_element:
# Add the valueURI to the dct:subject element as rdf:about
subject_element.set(
"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about", value_uri
)

# Check if
# subject has a definition in its props
definition = subject_props.get("definition")
if definition:
concept_elem = subject_element.find(
".//skos:Concept", namespaces=namespaces
)
if concept_elem is not None:
skos_definition = etree.Element(
"{http://www.w3.org/2004/02/skos/core#}definition"
)
skos_definition.text = definition
concept_elem.append(skos_definition)

return rdf_tree

def add_missing_creator_link(self, rdf_tree):
"""Add `rdf:about` attributes to <rdf:Description> within <dct:creator> if missing."""
namespaces = rdf_tree.nsmap
Expand Down Expand Up @@ -55,6 +100,11 @@ def transform_with_xslt(self, dc_record, **kwargs):
dc_etree.tag = "{{{0}}}resource".format(dc_namespace)
dcat_etree = self.xslt_transform_func(dc_etree).getroot()

# Add valueURI to subjects
subjects = dc_record.get("subjects", [])
if subjects:
dcat_etree = self.add_subjects_uri(dcat_etree, subjects)

# Add the identifier links for creators if missing
dcat_etree = self.add_missing_creator_link(dcat_etree)

Expand Down

0 comments on commit 1d5ce7d

Please sign in to comment.