From af2116e4beeab8a9a7763d4f8823f10d35afabac Mon Sep 17 00:00:00 2001 From: Ian Nesbitt Date: Tue, 27 Aug 2024 21:04:14 +0000 Subject: [PATCH] adding some logic to address #26 --- mnonboard/cli.py | 4 +-- mnonboard/cn.py | 23 +++++++++++++++-- mnonboard/info_chx.py | 19 ++++++++------ mnonboard/utils.py | 59 +++++++++++++++++++++++++++++++++++++++---- 4 files changed, 88 insertions(+), 17 deletions(-) diff --git a/mnonboard/cli.py b/mnonboard/cli.py index 7fcbfbc4..02fd9214 100644 --- a/mnonboard/cli.py +++ b/mnonboard/cli.py @@ -24,7 +24,7 @@ def run(cfg): cfg['token'] = info_chx.req_input('Please enter your DataONE authentication token: ') os.environ['D1_AUTH_TOKEN'] = cfg['token'] cfg['cert_loc'] = CN_CERT_LOC[cfg['mode']] - DC = cn.init_client(cn_url=cfg['cn_url'], auth_token=cfg['token']) + client = cn.init_client(cn_url=cfg['cn_url'], auth_token=cfg['token']) if cfg['info'] == 'user': # do the full user-driven info gathering process ufields = info_chx.user_input() @@ -45,7 +45,7 @@ def run(cfg): # add a subject for owner and submitter (may not be necessary if they exist already) # add subject for technical contact (step 6) val = fields[f] if f not in 'contact_subject' else fields['node'][f] - name = utils.get_or_create_subj(loc=loc, value=val, cn_url=cfg['cn_url'], title=f) + name = utils.get_or_create_subj(loc=loc, value=val, client=client, title=f) # store this for a few steps later names[val] = name # set the update schedule and set the state to up diff --git a/mnonboard/cn.py b/mnonboard/cn.py index 4c5ca202..bf522f63 100644 --- a/mnonboard/cn.py +++ b/mnonboard/cn.py @@ -1,9 +1,9 @@ -from os import environ +import logging from d1_client.cnclient import CoordinatingNodeClient from d1_common.types.dataoneTypes import Subject, person #import d1_admin_tools as d1np -from . import defs +from . import defs, utils def init_client(cn_url: str, auth_token: str): """ @@ -64,6 +64,25 @@ def register_user(client: CoordinatingNodeClient, orcid: str, name: str, email: :param str name: The name of the subject :param str email: The subject's email address """ + L = logging.getLogger(__name__) + s = Subject(orcid) + p = person() + p.subject = s + given, family = utils.parse_name(name) + p.givenName = given + p.familyName = family + if email: + p.mail = email + try: + client.registerAccount(p) + except Exception as e: + try: + err_n = str(e).split('\n')[0] + err_c = str(e).split('\n')[1] + err_d = str(e).split('\n')[3] + print('Error processing %s (%s)\n%s\n%s\n%s' % (name, orcid, err_n, err_c, err_d)) + except: + print(e) def set_nodes_properties(nodes_properties: dict, con=None): """ diff --git a/mnonboard/info_chx.py b/mnonboard/info_chx.py index bae02569..8bc39972 100644 --- a/mnonboard/info_chx.py +++ b/mnonboard/info_chx.py @@ -4,6 +4,7 @@ from mnonboard.defs import FIELDS, SITEMAP_URLS, ORCID_PREFIX, SCHEDULES, NODE_ID_PREFIX, SUBJECT_PREFIX, SUBJECT_POSTFIX from mnonboard import default_json, L +from mnonboard.cn import init_client from opersist.utils import JSON_TIME_FORMAT, dtnow from opersist.cli import getOpersistInstance @@ -203,7 +204,7 @@ def enter_int(prompt): L.warning("Number of database sitemap URLs can't be less than 1. (%s entered)" % i) print('Please enter 1 or greater.') -def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False): +def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False, client: CoordinatingNodeClient=None): """ Use the DataONE API to look up whether a given ORCiD number already exists in the system. @@ -214,11 +215,9 @@ def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False): :returns: Received response or False :rtype: str or bool """ - # this authentication method was adapted from: - # https://github.com/DataONEorg/dataone_examples/blob/master/python_examples/update_object.ipynb - options = {"headers": {"Authorization": "Bearer %s" % (D1_AUTH_TOKEN)}} - # Create the Member Node Client - client = CoordinatingNodeClient(cn_url, **options) + if not client: + # Create the Member Node Client + client = init_client(cn_url=cn_url, auth_token=D1_AUTH_TOKEN) try: # Get records L.info('Starting record lookup for %s from %s' % (subj, cn_url)) @@ -227,6 +226,7 @@ def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False): name = '%s %s' % (r[1], r[2]) L.info('Name associated with record %s found in %s: %s.' % (subj, cn_url, name)) rt = name if not debug else r + client._session.close() return rt except exceptions.NotFound as e: estrip = str(e).split('')[1].split('')[0] @@ -283,7 +283,7 @@ def set_role(loc, title, value): op.close() L.info('OPersist record set.') -def orcid_name(orcid, f): +def orcid_info(orcid, f): """ Ask the user for the name of an orcid number. @@ -295,7 +295,10 @@ def orcid_name(orcid, f): L.info('Asking for name of %s (ORCiD number %s)' % (f, orcid)) name = req_input('Please enter the name of %s (ORCiD number %s): ' % (f, orcid)) L.info('User has entered "%s"' % name) - return name + email = input('If the subject has an email address, enter it here (leave blank to skip): ') + L.info('User has entered "%s"' % email) + email = email if (email and ('@' in email)) else None + return name, email def enter_orcid(prompt): """ diff --git a/mnonboard/utils.py b/mnonboard/utils.py index 07b6b0f1..bf891aa2 100644 --- a/mnonboard/utils.py +++ b/mnonboard/utils.py @@ -6,10 +6,12 @@ import urllib.parse as urlparse import xmltodict from pathlib import Path +from logging import getLogger from mnonboard.defs import SCHEDULES, NAMES_DICT, SUBJECT_PREFIX, SUBJECT_POSTFIX, USER_NAME from mnonboard import NODE_PATH_REL, CUR_PATH_ABS, LOG_DIR, HARVEST_LOG_NAME, HM_DATE, L -from mnonboard.info_chx import cn_subj_lookup, local_subj_lookup, enter_schedule, orcid_name, set_role +from mnonboard.info_chx import cn_subj_lookup, local_subj_lookup, enter_schedule, orcid_info, set_role +from mnonboard.cn import register_user, init_client, CoordinatingNodeClient def load_json(loc: str): """ @@ -103,6 +105,52 @@ def init_repo(loc: str): L.error('opersist init command failed (node folder: %s): %s' % (loc, e)) exit(1) +def parse_name(fullname: str): + """ + Parse full names into given and family designations. + + This function parses full names into given and family names. It supports + various formats of names, including those with multiple given names and + family names. + + Supported formats: + Multiple given names: ``John Jacob Jingleheimer Schmidt`` + Given name and family name: ``John Schmidt`` + Family name and given name: ``Schmidt, John`` + Given name and family name with prefix: ``John von Schmidt`` + + :param fullname: The full name to be parsed. + :type fullname: str + :return: A tuple containing the given name and family name. + :rtype: tuple[str, str] + """ + given, family = None, None + if ', ' in fullname: + # split the fullname by comma and space, assign the family name and given name + [family, given] = fullname.title().split(', ')[:2] + if (given == None) and (family == None): + for q in [' del ', ' van ', ' de ', ' von ', ' der ', ' di ', ' la ', ' le ', ' da ', ' el ', ' al ', ' bin ']: + if q in fullname.lower(): + # split the fullname by the query string, assign the given name and family name + [given, family] = fullname.lower().split(q) + # capitalize the and concat the query string to the family name + given = given.title() + family = f'{q.strip()}{family.title()}' + if (given == None) and (family == None): + # split the fullname by space and capitalize each part + nlist = fullname.title().split() + # assign the last part as the family name and the first part as the given name + family = nlist[-1] + if len(nlist) >= 2: + given = nlist[0] + for i in range(1, len(nlist)-1): + # concatenate the remaining parts as the given name + given = f'{given} {nlist[i]}' + if (not given) or (not family): + L = getLogger(__name__) + L.warning(f'Could not parse name "{fullname}". Result of given name: "{given}" Family name: "{family}"') + return given, family + def new_subj(loc: str, name: str, value: str): """ Create new subject in the database using opersist. @@ -123,7 +171,7 @@ def new_subj(loc: str, name: str, value: str): L.error('opersist subject creation command failed for %s (%s): %s' % (name, value, e)) exit(1) -def get_or_create_subj(loc: str, value: str, cn_url: str, title: str='unspecified subject', name: str=None): +def get_or_create_subj(loc: str, value: str, client: CoordinatingNodeClient, title: str='unspecified subject', name: str=None): """ Get an existing subject using their ORCiD or create a new one with the specified values. @@ -146,12 +194,13 @@ def get_or_create_subj(loc: str, value: str, cn_url: str, title: str='unspecifie L.info(f'Node subject value: "{value}"') else: # name was not given. look up the orcid record in the database - name = cn_subj_lookup(subj=value, cn_url=cn_url) + name = cn_subj_lookup(subj=value, client=client) if not name: # if the name is not in either database, we will create it; else it's already there and we ignore it - L.info('%s does not exist at %s. Need a name for local record creation...' % (value, cn_url)) + L.info('%s does not exist at %s. Need a name for local record creation...' % (value, client.base_url)) # ask the user for a name with the associated position and ORCiD record - name = orcid_name(value, title) + name, email = orcid_info(value, title) + register_user(client=client, orcid=value, name=name, email=email) # finally, use opersist to create the subject local_subj_lookup(loc=loc, subj=value, name=name) # then use opersist to set the subject's role