Skip to content

Commit

Permalink
curation: cleanup award processing; minor fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
yashlamba authored and slint committed Dec 18, 2024
1 parent 80cc7bd commit 0725f17
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 59 deletions.
6 changes: 3 additions & 3 deletions site/zenodo_rdm/curation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
community_name_award_acronym,
contains_high_conf_keywords,
contains_low_conf_keywords,
eu_community_declined_request,
eu_community_request,
eu_subcommunity_declined_request,
published_before_award_start,
test_phrases_in_record,
Expand All @@ -34,7 +34,7 @@
"additional_desc_contains_low_conf_keywords": additional_desc_contains_low_conf_keywords,
"additional_desc_contains_high_conf_keywords": additional_desc_contains_high_conf_keywords,
"award_acronym_in_additional_description": award_acronym_in_additional_description,
"eu_community_declined_request": eu_community_declined_request,
"eu_community_request": eu_community_request,
"eu_subcommunity_declined_request": eu_subcommunity_declined_request,
"community_name_award_acronym": community_name_award_acronym,
}
Expand All @@ -51,7 +51,7 @@
"additional_desc_contains_low_conf_keywords": 0,
"additional_desc_contains_high_conf_keywords": 0,
"award_acronym_in_additional_description": 0,
"eu_community_declined_request": False,
"eu_community_request": False,
"eu_subcommunity_declined_request": False,
"community_name_award_acronym": 0,
}
Expand Down
103 changes: 47 additions & 56 deletions site/zenodo_rdm/curation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,39 +16,45 @@
from invenio_search.engine import dsl


def award_acronym_in_description(record):
"""Check if EU award name in record description."""
award_service = current_service_registry.get("awards")
description = record.metadata.get("description")
if not description:
return False
def _award_acronym_number_in_text(award, text):
"""Check for award number/acronym in data."""
if award.get("acronym") and (award.get("acronym") in text):
return True
if award.get("number") and (award.get("number") in text):
return True
return False


def _get_ec_awards(record):
award_service = current_service_registry.get("awards")
awards = []
funding = record.metadata.get("funding", [])
for f in funding:
if f["funder"].get("id") == "00k4n6c32":
if award_id := f.get("award", {}).get("id"):
award = award_service.record_cls.pid.resolve(award_id)
if award.get("acronym") and (
award.get("acronym").lower() in description.lower()
):
return True
awards.append(award)
return awards


def award_acronym_in_description(record):
"""Check if EU award name in record description."""
if description := record.metadata.get("description"):
awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, description):
return True
return False


def award_acronym_in_title(record):
"""Check if EU award name in record title."""
award_service = current_service_registry.get("awards")
title = record.metadata["title"]

funding = record.metadata.get("funding", [])
for f in funding:
if f["funder"].get("id") == "00k4n6c32":
if award_id := f.get("award", {}).get("id"):
award = award_service.record_cls.pid.resolve(award_id)
if award.get("acronym") and (
award.get("acronym").lower() in title.lower()
):
return True
awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, title):
return True
return False


Expand All @@ -67,18 +73,13 @@ def test_phrases_in_record(record):

def published_before_award_start(record):
"""Check if published before award start date."""
award_service = current_service_registry.get("awards")

funding = record.metadata.get("funding", [])
for f in funding:
if f["funder"].get("id") == "00k4n6c32":
if award_id := f.get("award", {}).get("id"):
award = award_service.record_cls.pid.resolve(award_id)
if award.get("start_date") and (
record.created.timestamp()
< arrow.get(award.get("start_date")).datetime.timestamp()
):
return True
awards = _get_ec_awards(record)
for award in awards:
if award.get("start_date") and (
record.created.timestamp()
< arrow.get(award.get("start_date")).datetime.timestamp()
):
return True
return False


Expand Down Expand Up @@ -148,23 +149,17 @@ def additional_desc_contains_low_conf_keywords(record):

def award_acronym_in_additional_description(record):
"""Check if EU award name in record additional description."""
award_service = current_service_registry.get("awards")
additional_descriptions = record.metadata.get("additional_descriptions", [])
record_data = " ".join([x.get("description", "") for x in additional_descriptions])

funding = record.metadata.get("funding", [])
for f in funding:
if f["funder"].get("id") == "00k4n6c32":
if award_id := f.get("award", {}).get("id"):
award = award_service.record_cls.pid.resolve(award_id)
if award.get("acronym") and (
award.get("acronym").lower() in record_data.lower()
):
return True
awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, record_data):
return True
return False


def eu_community_declined_request(record):
def eu_community_request(record):
"""Check if record was rejected from EU community."""
community_requests = dsl.Q(
"bool",
Expand All @@ -188,6 +183,8 @@ def eu_community_declined_request(record):
results = current_requests_service.search(system_identity, extra_filter=finalq)

for result in results:
# return true if there was a declined request or an existing open request
# as we respond to open requests ourselves.
if result["is_closed"] and result["status"] == "declined":
return True
if result["is_open"] and not result["is_expired"]:
Expand Down Expand Up @@ -216,10 +213,10 @@ def eu_subcommunity_declined_request(record):
results = current_requests_service.search(system_identity, extra_filter=finalq)

for result in results:
receiver = current_communities.service.record_cls.pid.resolve(
community = current_communities.service.record_cls.pid.resolve(
result["receiver"]["community"]
)
if receiver.parent and str(receiver.parent.id) == current_app.config.get(
if community.parent and str(community.parent.id) == current_app.config.get(
"EU_COMMUNITY_UUID"
):
if result["status"] == "declined":
Expand All @@ -232,17 +229,11 @@ def community_name_award_acronym(record):
comm_text = ""
for comm in record.parent.communities:
comm_text += comm.metadata.get("title", "")
comm_text += comm.metadata.get("page", "")
comm_text += " " + comm.metadata.get("page", "")

if comm_text:
award_service = current_service_registry.get("awards")
funding = record.metadata.get("funding", [])
for f in funding:
if f["funder"].get("id") == "00k4n6c32":
if award_id := f.get("award", {}).get("id"):
award = award_service.record_cls.pid.resolve(award_id)
if award.get("acronym") and (
award.get("acronym").lower() in comm_text.lower()
):
return True
awards = _get_ec_awards(record)
for award in awards:
if _award_acronym_number_in_text(award, comm_text):
return True
return False

0 comments on commit 0725f17

Please sign in to comment.