From 2373db27d4e88b6d70dc902acb249d2e5ab38d0a Mon Sep 17 00:00:00 2001 From: yashlamba Date: Tue, 17 Dec 2024 13:54:15 +0100 Subject: [PATCH] curation: add grant agreement number checks; docstrings fix --- site/zenodo_rdm/curation/config.py | 12 ++++++-- site/zenodo_rdm/curation/rules.py | 48 +++++++++++++++++++++++------- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/site/zenodo_rdm/curation/config.py b/site/zenodo_rdm/curation/config.py index a0b358fa..ac936fe9 100644 --- a/site/zenodo_rdm/curation/config.py +++ b/site/zenodo_rdm/curation/config.py @@ -13,7 +13,9 @@ award_acronym_in_additional_description, award_acronym_in_description, award_acronym_in_title, - community_name_award_acronym, + award_number_in_additional_description, + award_number_in_description, + community_data_award_acronym, contains_high_conf_keywords, contains_low_conf_keywords, eu_community_request, @@ -36,7 +38,9 @@ "award_acronym_in_additional_description": award_acronym_in_additional_description, "eu_community_request": eu_community_request, "eu_subcommunity_declined_request": eu_subcommunity_declined_request, - "community_name_award_acronym": community_name_award_acronym, + "community_data_award_acronym": community_data_award_acronym, + "award_number_in_additional_description": award_number_in_additional_description, + "award_number_in_description": award_number_in_description, } """Rules to run for EU Curation.""" @@ -53,7 +57,9 @@ "award_acronym_in_additional_description": 0, "eu_community_request": False, "eu_subcommunity_declined_request": False, - "community_name_award_acronym": 0, + "community_data_award_acronym": 0, + "award_number_in_additional_description": 0, + "award_number_in_description": 0, } """Rule scores for EU Curation (bool value implies direct approval/decline).""" diff --git a/site/zenodo_rdm/curation/rules.py b/site/zenodo_rdm/curation/rules.py index 944b011c..fd489fdf 100644 --- a/site/zenodo_rdm/curation/rules.py +++ b/site/zenodo_rdm/curation/rules.py @@ -16,16 +16,22 @@ from invenio_search.engine import dsl -def _award_acronym_number_in_text(award, text): - """Check for award number/acronym in data.""" - if award.get("acronym") and (award.get("acronym") in text): +def _award_acronym_in_text(award, text): + """Check for award acronym in data.""" + if award.get("acronym") and (award.get("acronym").lower() in text.lower()): return True - if award.get("number") and (award.get("number") in text): + return False + + +def _award_number_in_text(award, text): + """Check for award number in data.""" + if award.get("number") and (str(award.get("number")) in text): return True return False def _get_ec_awards(record): + """Get all EC funded awards of record.""" award_service = current_service_registry.get("awards") awards = [] funding = record.metadata.get("funding", []) @@ -42,7 +48,17 @@ def award_acronym_in_description(record): if description := record.metadata.get("description"): awards = _get_ec_awards(record) for award in awards: - if _award_acronym_number_in_text(award, description): + if _award_acronym_in_text(award, description): + return True + return False + + +def award_number_in_description(record): + """Check if EU award number in record description.""" + if description := record.metadata.get("description"): + awards = _get_ec_awards(record) + for award in awards: + if _award_number_in_text(award, description): return True return False @@ -53,7 +69,7 @@ def award_acronym_in_title(record): awards = _get_ec_awards(record) for award in awards: - if _award_acronym_number_in_text(award, title): + if _award_acronym_in_text(award, title): return True return False @@ -154,7 +170,19 @@ def award_acronym_in_additional_description(record): awards = _get_ec_awards(record) for award in awards: - if _award_acronym_number_in_text(award, record_data): + if _award_acronym_in_text(award, record_data): + return True + return False + + +def award_number_in_additional_description(record): + """Check if EU award number in record additional description.""" + additional_descriptions = record.metadata.get("additional_descriptions", []) + record_data = " ".join([x.get("description", "") for x in additional_descriptions]) + + awards = _get_ec_awards(record) + for award in awards: + if _award_number_in_text(award, record_data): return True return False @@ -224,8 +252,8 @@ def eu_subcommunity_declined_request(record): return False -def community_name_award_acronym(record): - """Check if award acronym in community name.""" +def community_data_award_acronym(record): + """Check if award acronym in community data.""" comm_text = "" for comm in record.parent.communities: comm_text += comm.metadata.get("title", "") @@ -234,6 +262,6 @@ def community_name_award_acronym(record): if comm_text: awards = _get_ec_awards(record) for award in awards: - if _award_acronym_number_in_text(award, comm_text): + if _award_acronym_in_text(award, comm_text): return True return False