From 84279677ab0bf35ff19b0d34fdcab324e4076c5e Mon Sep 17 00:00:00 2001 From: yashlamba Date: Wed, 27 Nov 2024 10:59:05 +0100 Subject: [PATCH] curation: update scoring config; add proxies --- site/zenodo_rdm/curation/config.py | 7 +++++++ site/zenodo_rdm/curation/curators.py | 22 +++++++++++++--------- site/zenodo_rdm/curation/ext.py | 8 ++++++++ site/zenodo_rdm/curation/proxies.py | 13 +++++++++++++ site/zenodo_rdm/curation/rules.py | 3 --- site/zenodo_rdm/curation/tasks.py | 16 ++++++++++++---- 6 files changed, 53 insertions(+), 16 deletions(-) create mode 100644 site/zenodo_rdm/curation/proxies.py diff --git a/site/zenodo_rdm/curation/config.py b/site/zenodo_rdm/curation/config.py index dfe56840..9e0f47e2 100644 --- a/site/zenodo_rdm/curation/config.py +++ b/site/zenodo_rdm/curation/config.py @@ -20,5 +20,12 @@ } """Rules to run for EU Curation.""" +CURATION_SCORES = { + "award_acronym_in_title": 5, + "award_acronym_in_description": 10, + "test_phrases_in_record": False, +} +"""Rule scores for EU Curation.""" + CURATION_ENABLE_EU_CURATOR = False """Controls whether to dry run EU Curation.""" diff --git a/site/zenodo_rdm/curation/curators.py b/site/zenodo_rdm/curation/curators.py index 2a7814a4..81698e9d 100644 --- a/site/zenodo_rdm/curation/curators.py +++ b/site/zenodo_rdm/curation/curators.py @@ -7,17 +7,18 @@ """Curators for ZenodoRDM Curation.""" - from flask import current_app from invenio_access.permissions import system_identity from invenio_rdm_records.proxies import current_record_communities_service from invenio_records_resources.services.uow import UnitOfWork +from zenodo_rdm.curation.proxies import current_curation + class BaseCurator: """Base Curator class.""" - def __init__(self, dry=False, raise_exc=False) -> None: + def __init__(self, dry=False, raise_exc=False): """Constructor.""" self.dry = dry self.raise_exc = raise_exc @@ -59,13 +60,16 @@ def _evaluator(self, results): """Evaluate result for EC curation.""" score = 0 for rule, result in results.items(): - # TODO put in config? - if rule == "award_in_title" and result: - score += 5 - if rule == "award_in_description" and result: - score += 10 - if rule == "test_word_record" and result: - return False + rule_score = current_curation.scores.get(rule) + if isinstance(rule_score, int): + score += rule_score if result else 0 + elif isinstance(rule_score, bool): + if result: + return rule_score + else: + continue + else: + raise ValueError("Unsupported score type configured.") return score >= current_app.config.get("CURATION_EU_CURATION_THRESHOLD") @property diff --git a/site/zenodo_rdm/curation/ext.py b/site/zenodo_rdm/curation/ext.py index 460b9e5c..3a9dfc68 100644 --- a/site/zenodo_rdm/curation/ext.py +++ b/site/zenodo_rdm/curation/ext.py @@ -34,3 +34,11 @@ def init_app(self, app): """Flask application initialization.""" self.init_config(app) app.extensions["zenodo-curation"] = self + + @cached_property + def scores(self): + """Return curation scores used for rules.""" + return { + **config.CURATION_SCORES, + **current_app.config.get("CURATION_SCORES", {}), + } diff --git a/site/zenodo_rdm/curation/proxies.py b/site/zenodo_rdm/curation/proxies.py new file mode 100644 index 00000000..74caccd2 --- /dev/null +++ b/site/zenodo_rdm/curation/proxies.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2023 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + +"""Proxy objects for easier access to application objects.""" + +from flask import current_app +from werkzeug.local import LocalProxy + +current_curation = LocalProxy(lambda: current_app.extensions["zenodo-curation"]) diff --git a/site/zenodo_rdm/curation/rules.py b/site/zenodo_rdm/curation/rules.py index 6af0580c..eba7b628 100644 --- a/site/zenodo_rdm/curation/rules.py +++ b/site/zenodo_rdm/curation/rules.py @@ -13,7 +13,6 @@ def award_acronym_in_description(record): """Check if EU award name in record description.""" - award_service = current_service_registry.get("awards") description = record.metadata["description"] funding = record.metadata["funding"] @@ -29,7 +28,6 @@ def award_acronym_in_description(record): def award_acronym_in_title(record): """Check if EU award name in record title.""" - award_service = current_service_registry.get("awards") title = record.metadata["title"] funding = record.metadata["funding"] @@ -45,7 +43,6 @@ def award_acronym_in_title(record): def test_phrases_in_record(record): """Check if test words in record.""" - test_phrases = current_app.config.get("CURATION_TEST_PHRASES") record_data = record.metadata["title"] + " " + record.metadata["description"] diff --git a/site/zenodo_rdm/curation/tasks.py b/site/zenodo_rdm/curation/tasks.py index 50b46512..ee7a3970 100644 --- a/site/zenodo_rdm/curation/tasks.py +++ b/site/zenodo_rdm/curation/tasks.py @@ -1,3 +1,11 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# ZenodoRDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. +"""Tasks for curation.""" + from datetime import datetime, timedelta from celery import shared_task @@ -5,6 +13,7 @@ from invenio_access.permissions import system_identity from invenio_rdm_records.proxies import current_rdm_records_service as records_service from invenio_search.engine import dsl + from zenodo_rdm.curation.curators import EURecordCurator @@ -52,13 +61,12 @@ def run_eu_record_curation(since): try: result = curator.run(record=record) ctx["processed"] += 1 - except Exception: + if result["evaluation"]: + ctx["approved"] += 1 + except Exception as e: # NOTE Since curator's raise_exc is by default false, rules would not fail. # This catches failure due to other reasons ctx["failed"] += 1 - if result["evaluation"]: - ctx["approved"] += 1 - current_app.logger.error( f"EU curation processed", extra=ctx,