From 73d66547917effb7ed11def3f0d24d41fa3a65b0 Mon Sep 17 00:00:00 2001 From: speckij Date: Tue, 19 Nov 2024 13:01:25 +0100 Subject: [PATCH] fix `properties->updated` + cleanup code --- pygeoapi-config.yml | 23 +++ pygeoapi/provider/csw_facade_dcat.py | 264 +++++---------------------- 2 files changed, 67 insertions(+), 220 deletions(-) diff --git a/pygeoapi-config.yml b/pygeoapi-config.yml index b8dc96e01..18bc71407 100644 --- a/pygeoapi-config.yml +++ b/pygeoapi-config.yml @@ -177,3 +177,26 @@ resources: - type: record name: CSWFacadeDCAT data: https://sdi.eea.europa.eu/catalogue/srv/eng/csw + BGS.uk: + type: collection + title: OGC Catalogue service for the web (CSW) + description: metadata records from www.bgs.ac.uk/ + keywords: + - observations + - monitoring + links: + - type: text/html + rel: canonical + title: documentation + href: www.bgs.ac.uk/ + hreflang: en-US + extents: + spatial: + bbox: [ -180,-90,180,90 ] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + linked-data: + item_template: schemas/dcat/dcat-template.jsonld + providers: + - type: record + name: CSWFacadeDCAT + data: https://metadata.bgs.ac.uk/geonetwork/srv/eng/csw diff --git a/pygeoapi/provider/csw_facade_dcat.py b/pygeoapi/provider/csw_facade_dcat.py index 514272fc1..83c044e18 100644 --- a/pygeoapi/provider/csw_facade_dcat.py +++ b/pygeoapi/provider/csw_facade_dcat.py @@ -33,8 +33,9 @@ from owslib import fes from owslib.csw import CatalogueServiceWeb from owslib.ows import ExceptionReport +from pygeoapi.provider.csw_facade import CSWFacadeProvider -from pygeoapi.provider.base import (BaseProvider, ProviderConnectionError, +from pygeoapi.provider.base import (ProviderConnectionError, ProviderInvalidQueryError, ProviderItemNotFoundError, ProviderQueryError) @@ -43,182 +44,9 @@ LOGGER = logging.getLogger(__name__) -class CSWFacadeDCATProvider(BaseProvider): +class CSWFacadeDCATProvider(CSWFacadeProvider): """CSW Facade provider""" - def __init__(self, provider_def): - """ - Initialize object - - :param provider_def: provider definition - - :returns: pygeoapi.provider.csv_.CSWFacadeProvider - """ - - super().__init__(provider_def) - - self.record_mappings = { - 'type': ('dc:type', 'type'), - 'title': ('dc:title', 'title'), - 'description': ('dct:abstract', 'abstract'), - 'keywords': ('dc:subject', 'subjects'), - 'date': ('dc:date', 'date'), - 'created': ('dct:created', 'created'), - 'updated': ('dct:modified', 'modified'), - 'rights': ('dc:rights', 'rights'), - 'language': ('dc:language', 'language') - } - - self._fields = {} - self.get_fields() - - def get_fields(self): - """ - Get provider field information (names, types) - - :returns: dict of fields - """ - - if not self._fields: - date_fields = ['date', 'created', 'updated'] - - for key in self.record_mappings.keys(): - LOGGER.debug(f'key: {key}') - self._fields[key] = {'type': 'string'} - - if key in date_fields: - self._fields[key]['format'] = 'date-time' - - return self._fields - - @crs_transform - def query(self, offset=0, limit=10, resulttype='results', - bbox=[], datetime_=None, properties=[], sortby=[], - select_properties=[], skip_geometry=False, q=None, **kwargs): - """ - CSW GetRecords query - - :param offset: starting record to return (default 0) - :param limit: number of records to return (default 10) - :param resulttype: return results or hit limit (default results) - :param bbox: bounding box [minx,miny,maxx,maxy] - :param datetime_: temporal (datestamp or extent) - :param properties: list of tuples (name, value) - :param sortby: list of dicts (property, order) - :param select_properties: list of property names - :param skip_geometry: bool of whether to skip geometry (default False) - :param q: full-text search term(s) - - :returns: `dict` of GeoJSON FeatureCollection - """ - - constraints = [] - - response = { - 'type': 'FeatureCollection', - 'features': [] - } - - LOGGER.debug('Processing query parameters') - - if bbox: - LOGGER.debug('Processing bbox parameter') - LOGGER.debug('Swapping coordinate axis order from xy to yx') - bbox2 = [bbox[1], bbox[0], bbox[3], bbox[2]] - constraints.append(fes.BBox(bbox2)) - - if datetime_: - date_property = self.record_mappings[self.time_field][0] - LOGGER.debug('Processing datetime parameter') - if '/' in datetime_: - begin, end = datetime_.split('/') - LOGGER.debug('Processing time extent') - constraints.append(fes.PropertyIsGreaterThan(date_property, begin)) # noqa - constraints.append(fes.PropertyIsLessThan(date_property, end)) - else: - LOGGER.debug('Processing time instant') - constraints.append(fes.PropertyIsEqualTo(date_property, - datetime_)) - - for p in properties: - LOGGER.debug(f'Processing property {p} parameter') - if p[0] not in list(self.record_mappings.keys()): - msg = f'Invalid property: {p[0]}' - LOGGER.error(msg) - raise ProviderInvalidQueryError(user_msg=msg) - - prop = self.record_mappings[p[0]][0] - constraints.append(fes.PropertyIsEqualTo(prop, p[1])) - - if q is not None: - LOGGER.debug('Processing q parameter') - anytext = fes.PropertyIsLike(propertyname='csw:AnyText', literal=q, - escapeChar='\\', singleChar='?', - wildCard='*') - constraints.append(anytext) - - if sortby: - LOGGER.debug('Processing sortby parameter') - sorts = [] - sort_orders = { - '+': 'ASC', - '-': 'DESC' - } - for s in sortby: - sorts.append(fes.SortProperty( - self.record_mappings[s['property']][0], - sort_orders[s['order']])) - sortby2 = fes.SortBy(sorts) - else: - sortby2 = None - - if len(constraints) > 1: - constraints = [fes.And(constraints)] - - LOGGER.debug(f'Querying CSW: {self.data}') - csw = self._get_csw() - try: - csw.getrecords2(esn='full', maxrecords=limit, startposition=offset, - constraints=constraints, sortby=sortby2, - resulttype=resulttype) - except ExceptionReport as err: - msg = f'CSW error {err}' - LOGGER.error(msg) - raise ProviderQueryError(msg) - - response['numberMatched'] = csw.results['matches'] - response['numberReturned'] = csw.results['returned'] - LOGGER.debug(f"Found {response['numberMatched']} records") - LOGGER.debug(f"Returned {response['numberReturned']} records") - - LOGGER.debug('Building result set') - for record in csw.records.values(): - response['features'].append(self._owslibrecord2record(record)) - - return response - - @crs_transform - def get(self, identifier, **kwargs): - """ - CSW GetRecordById query - - :param identifier: feature id - - :returns: dict of single GeoJSON feature - """ - - csw = self._get_csw() - csw.getrecordbyid([identifier], esn='full') - - if not csw.records: - err = f'item {identifier} not found' - LOGGER.error(err) - raise ProviderItemNotFoundError(err) - - record_key = list(csw.records.keys())[0] - - return self._owslibrecord2record(csw.records[record_key]) - def _get_csw(self) -> CatalogueServiceWeb: """ Helper function to lazy load a CSW @@ -232,50 +60,29 @@ def _get_csw(self) -> CatalogueServiceWeb: except Exception as err: err = f'CSW connection error: {err}' - LOGGER.error(err) raise ProviderConnectionError(err) - - def _gen_getrecordbyid_link(self, identifier: str, - csw_version: str = '2.0.2') -> dict: - """ - Helper function to generate a CSW GetRecordById URL - - :param identifier: `str` of record identifier - :param csw_version: `str` of CSW version (default is `2.0.2`) - - :returns: `dict` of link object of GetRecordById URL - """ - - params = { - 'service': 'CSW', - 'version': csw_version, - 'request': 'GetRecordById', - 'id': identifier - } - - return { - 'rel': 'alternate', - 'type': 'application/xml', - 'title': 'This document as XML', - 'href': f'{self.data}?{urlencode(params)}', - } - def _owslibrecord2record(self, record): LOGGER.debug(f'Transforming {record.identifier}') + time = None + # conform to https://ogcincubator.github.io/bblocks-ogcapi-records/build/annotated/api/records/v1/schemas/time/schema.yaml + if record.date: + if ":" in record.date: + time = { + "timestamp": record.date if record.date[-1] == "Z" else record.date + "Z" + } + else: + time = { + "date": record.date + } + feature = { 'id': record.identifier, 'type': 'Feature', 'geometry': None, - 'time': {"timestamp": record.date} if record.date is not None else None, + 'time': time, 'properties': {}, 'links': [ - self._gen_getrecordbyid_link(record.identifier), - { - 'rel': 'alternate', - 'type': 'application/json+ld', - 'title': 'This document as JSON-LD (GeoDCAT-upliftable)', - 'href': f'./{record.identifier}?f=jsonld', - } + self._gen_getrecordbyid_link(record.identifier) ] } @@ -283,7 +90,12 @@ def _owslibrecord2record(self, record): for key, value in self.record_mappings.items(): prop_value = getattr(record, value[1]) if prop_value not in [None, [], '']: - feature['properties'][key] = prop_value + if key == "language": + feature['properties'][key] = {"code": prop_value} + elif key == "updated": + feature['properties'][key] = self._format_updated(prop_value) + else: + feature['properties'][key] = prop_value if record.bbox is not None: LOGGER.debug('Adding bbox') @@ -298,25 +110,37 @@ def _owslibrecord2record(self, record): if record.references: LOGGER.debug('Adding references as links') for link in record.references: - feature['links'].append({ - 'title': link['scheme'], - 'href': link['url'] - }) + if link['url']: + feature['links'].append({ + 'rel': 'alternate', + 'title': link['scheme'] if link['scheme'] else "unknown", + 'href': link['url'] + }) if record.uris: LOGGER.debug('Adding URIs as links') for link in record.uris: - feature['links'].append({ - 'title': link['name'], - 'href': link['url'] - }) + if link['url']: + feature['links'].append({ + 'rel': 'alternate', + 'title': link['name'] if link['name'] else "unknown", + 'href': link['url'] + }) if record.rights: right = "" for link in record.rights: - right += link + if link: + right += link feature["properties"]["rights"] = right return feature + def _format_updated(self, value: str): + # conform to json-schema type: `date-time` + if ":" not in value: + return value + "T00:00:00Z" + else: + return value + def __repr__(self): return f' {self.data}'