Skip to content

Commit

Permalink
exporters: added dcat serializer
Browse files Browse the repository at this point in the history
  • Loading branch information
0einstein0 committed Dec 3, 2024
1 parent ea37f28 commit dfd6dc3
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 0 deletions.
9 changes: 9 additions & 0 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -1118,6 +1118,15 @@ APP_RDM_RECORD_EXPORTERS = {
"params": {},
"content-type": "application/vnd.datacite.datacite+xml",
"filename": "{id}.xml",
},
"dcat-ap": {
"name": _("DCAT"),
"serializer": (
"zenodo_rdm.serializers:ZenodoDCATSerializer"
),
"params": {},
"content-type": "application/dcat+xml",
"filename": "{id}.xml",
},
"cff": {
"name": _("Citation File Format"),
Expand Down
2 changes: 2 additions & 0 deletions site/zenodo_rdm/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
from .cff import ZenodoCFFSerializer
from .codemeta import ZenodoCodemetaSerializer
from .datacite import ZenodoDataciteJSONSerializer, ZenodoDataciteXMLSerializer
from .dcat import ZenodoDCATSerializer

__all__ = (
"ZenodoBibtexSerializer",
"ZenodoCodemetaSerializer",
"ZenodoDataciteJSONSerializer",
"ZenodoDataciteXMLSerializer",
"ZenodoCFFSerializer",
"ZenodoDCATSerializer",
)
69 changes: 69 additions & 0 deletions site/zenodo_rdm/serializers/dcat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.
"""Zenodo dcat serializer."""


import idutils
from datacite import schema43
from invenio_rdm_records.resources.serializers.dcat import DCATSerializer
from lxml import etree


class ZenodoDCATSerializer(DCATSerializer):
"""Zenodo DCAT Serializer."""

def __init__(self, **options):
"""Constructor."""
super().__init__(**options)

def add_missing_creator_link(self, rdf_tree):
"""Add `rdf:about` attributes to <rdf:Description> within <dct:creator> if missing."""
namespaces = rdf_tree.nsmap
creators = rdf_tree.xpath(
"//dct:creator/rdf:Description[not(@rdf:about)]", namespaces=namespaces
)

for description in creators:
identifier_elem = description.find("dct:identifier", namespaces)
if identifier_elem is not None:
identifier = identifier_elem.text.strip()
schemes = idutils.detect_identifier_schemes(identifier)
rdf_about_url = next(
(
idutils.to_url(identifier, scheme=scheme)
for scheme in schemes
if idutils.to_url(identifier, scheme)
),
None,
)
if rdf_about_url:
description.set(
"{http://www.w3.org/1999/02/22-rdf-syntax-ns#}about",
rdf_about_url,
)
return rdf_tree

def transform_with_xslt(self, dc_record, **kwargs):
"""Transform record with XSLT and add rdf:about."""
# Transform with base class functionality
dc_etree = schema43.dump_etree(dc_record)
dc_namespace = schema43.ns[None]
dc_etree.tag = "{{{0}}}resource".format(dc_namespace)
dcat_etree = self.xslt_transform_func(dc_etree).getroot()

# Add the identifier links for creators if missing
dcat_etree = self.add_missing_creator_link(dcat_etree)

# Inject files in results (since the XSLT can't do that by default)
files_data = dc_record.get("_files", [])
if files_data:
self._add_files(
root=dcat_etree,
files=files_data,
)

return dcat_etree

0 comments on commit dfd6dc3

Please sign in to comment.