Skip to content

Commit

Permalink
adding some logic to address #26
Browse files Browse the repository at this point in the history
  • Loading branch information
iannesbitt committed Aug 27, 2024
1 parent fc53071 commit af2116e
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 17 deletions.
4 changes: 2 additions & 2 deletions mnonboard/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def run(cfg):
cfg['token'] = info_chx.req_input('Please enter your DataONE authentication token: ')
os.environ['D1_AUTH_TOKEN'] = cfg['token']
cfg['cert_loc'] = CN_CERT_LOC[cfg['mode']]
DC = cn.init_client(cn_url=cfg['cn_url'], auth_token=cfg['token'])
client = cn.init_client(cn_url=cfg['cn_url'], auth_token=cfg['token'])
if cfg['info'] == 'user':
# do the full user-driven info gathering process
ufields = info_chx.user_input()
Expand All @@ -45,7 +45,7 @@ def run(cfg):
# add a subject for owner and submitter (may not be necessary if they exist already)
# add subject for technical contact (step 6)
val = fields[f] if f not in 'contact_subject' else fields['node'][f]
name = utils.get_or_create_subj(loc=loc, value=val, cn_url=cfg['cn_url'], title=f)
name = utils.get_or_create_subj(loc=loc, value=val, client=client, title=f)
# store this for a few steps later
names[val] = name
# set the update schedule and set the state to up
Expand Down
23 changes: 21 additions & 2 deletions mnonboard/cn.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from os import environ
import logging
from d1_client.cnclient import CoordinatingNodeClient
from d1_common.types.dataoneTypes import Subject, person
#import d1_admin_tools as d1np

from . import defs
from . import defs, utils

def init_client(cn_url: str, auth_token: str):
"""
Expand Down Expand Up @@ -64,6 +64,25 @@ def register_user(client: CoordinatingNodeClient, orcid: str, name: str, email:
:param str name: The name of the subject
:param str email: The subject's email address
"""
L = logging.getLogger(__name__)
s = Subject(orcid)
p = person()
p.subject = s
given, family = utils.parse_name(name)
p.givenName = given
p.familyName = family
if email:
p.mail = email
try:
client.registerAccount(p)
except Exception as e:
try:
err_n = str(e).split('\n')[0]
err_c = str(e).split('\n')[1]
err_d = str(e).split('\n')[3]
print('Error processing %s (%s)\n%s\n%s\n%s' % (name, orcid, err_n, err_c, err_d))
except:
print(e)

def set_nodes_properties(nodes_properties: dict, con=None):
"""
Expand Down
19 changes: 11 additions & 8 deletions mnonboard/info_chx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from mnonboard.defs import FIELDS, SITEMAP_URLS, ORCID_PREFIX, SCHEDULES, NODE_ID_PREFIX, SUBJECT_PREFIX, SUBJECT_POSTFIX
from mnonboard import default_json, L
from mnonboard.cn import init_client
from opersist.utils import JSON_TIME_FORMAT, dtnow
from opersist.cli import getOpersistInstance

Expand Down Expand Up @@ -203,7 +204,7 @@ def enter_int(prompt):
L.warning("Number of database sitemap URLs can't be less than 1. (%s entered)" % i)
print('Please enter 1 or greater.')

def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False):
def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False, client: CoordinatingNodeClient=None):
"""
Use the DataONE API to look up whether a given ORCiD number already exists
in the system.
Expand All @@ -214,11 +215,9 @@ def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False):
:returns: Received response or False
:rtype: str or bool
"""
# this authentication method was adapted from:
# https://github.com/DataONEorg/dataone_examples/blob/master/python_examples/update_object.ipynb
options = {"headers": {"Authorization": "Bearer %s" % (D1_AUTH_TOKEN)}}
# Create the Member Node Client
client = CoordinatingNodeClient(cn_url, **options)
if not client:
# Create the Member Node Client
client = init_client(cn_url=cn_url, auth_token=D1_AUTH_TOKEN)
try:
# Get records
L.info('Starting record lookup for %s from %s' % (subj, cn_url))
Expand All @@ -227,6 +226,7 @@ def cn_subj_lookup(subj, cn_url='https://cn.dataone.org/cn', debug=False):
name = '%s %s' % (r[1], r[2])
L.info('Name associated with record %s found in %s: %s.' % (subj, cn_url, name))
rt = name if not debug else r
client._session.close()
return rt
except exceptions.NotFound as e:
estrip = str(e).split('<description>')[1].split('</description>')[0]
Expand Down Expand Up @@ -283,7 +283,7 @@ def set_role(loc, title, value):
op.close()
L.info('OPersist record set.')

def orcid_name(orcid, f):
def orcid_info(orcid, f):
"""
Ask the user for the name of an orcid number.
Expand All @@ -295,7 +295,10 @@ def orcid_name(orcid, f):
L.info('Asking for name of %s (ORCiD number %s)' % (f, orcid))
name = req_input('Please enter the name of %s (ORCiD number %s): ' % (f, orcid))
L.info('User has entered "%s"' % name)
return name
email = input('If the subject has an email address, enter it here (leave blank to skip): ')
L.info('User has entered "%s"' % email)
email = email if (email and ('@' in email)) else None
return name, email

def enter_orcid(prompt):
"""
Expand Down
59 changes: 54 additions & 5 deletions mnonboard/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
import urllib.parse as urlparse
import xmltodict
from pathlib import Path
from logging import getLogger

from mnonboard.defs import SCHEDULES, NAMES_DICT, SUBJECT_PREFIX, SUBJECT_POSTFIX, USER_NAME
from mnonboard import NODE_PATH_REL, CUR_PATH_ABS, LOG_DIR, HARVEST_LOG_NAME, HM_DATE, L
from mnonboard.info_chx import cn_subj_lookup, local_subj_lookup, enter_schedule, orcid_name, set_role
from mnonboard.info_chx import cn_subj_lookup, local_subj_lookup, enter_schedule, orcid_info, set_role
from mnonboard.cn import register_user, init_client, CoordinatingNodeClient

def load_json(loc: str):
"""
Expand Down Expand Up @@ -103,6 +105,52 @@ def init_repo(loc: str):
L.error('opersist init command failed (node folder: %s): %s' % (loc, e))
exit(1)

def parse_name(fullname: str):
"""
Parse full names into given and family designations.
This function parses full names into given and family names. It supports
various formats of names, including those with multiple given names and
family names.
Supported formats:
Multiple given names: ``John Jacob Jingleheimer Schmidt``
Given name and family name: ``John Schmidt``
Family name and given name: ``Schmidt, John``
Given name and family name with prefix: ``John von Schmidt``
:param fullname: The full name to be parsed.
:type fullname: str
:return: A tuple containing the given name and family name.
:rtype: tuple[str, str]
"""
given, family = None, None
if ', ' in fullname:
# split the fullname by comma and space, assign the family name and given name
[family, given] = fullname.title().split(', ')[:2]
if (given == None) and (family == None):
for q in [' del ', ' van ', ' de ', ' von ', ' der ', ' di ', ' la ', ' le ', ' da ', ' el ', ' al ', ' bin ']:
if q in fullname.lower():
# split the fullname by the query string, assign the given name and family name
[given, family] = fullname.lower().split(q)
# capitalize the and concat the query string to the family name
given = given.title()
family = f'{q.strip()}{family.title()}'
if (given == None) and (family == None):
# split the fullname by space and capitalize each part
nlist = fullname.title().split()
# assign the last part as the family name and the first part as the given name
family = nlist[-1]
if len(nlist) >= 2:
given = nlist[0]
for i in range(1, len(nlist)-1):
# concatenate the remaining parts as the given name
given = f'{given} {nlist[i]}'
if (not given) or (not family):
L = getLogger(__name__)
L.warning(f'Could not parse name "{fullname}". Result of given name: "{given}" Family name: "{family}"')
return given, family

def new_subj(loc: str, name: str, value: str):
"""
Create new subject in the database using opersist.
Expand All @@ -123,7 +171,7 @@ def new_subj(loc: str, name: str, value: str):
L.error('opersist subject creation command failed for %s (%s): %s' % (name, value, e))
exit(1)

def get_or_create_subj(loc: str, value: str, cn_url: str, title: str='unspecified subject', name: str=None):
def get_or_create_subj(loc: str, value: str, client: CoordinatingNodeClient, title: str='unspecified subject', name: str=None):
"""
Get an existing subject using their ORCiD or create a new one with the
specified values.
Expand All @@ -146,12 +194,13 @@ def get_or_create_subj(loc: str, value: str, cn_url: str, title: str='unspecifie
L.info(f'Node subject value: "{value}"')
else:
# name was not given. look up the orcid record in the database
name = cn_subj_lookup(subj=value, cn_url=cn_url)
name = cn_subj_lookup(subj=value, client=client)
if not name:
# if the name is not in either database, we will create it; else it's already there and we ignore it
L.info('%s does not exist at %s. Need a name for local record creation...' % (value, cn_url))
L.info('%s does not exist at %s. Need a name for local record creation...' % (value, client.base_url))
# ask the user for a name with the associated position and ORCiD record
name = orcid_name(value, title)
name, email = orcid_info(value, title)
register_user(client=client, orcid=value, name=name, email=email)
# finally, use opersist to create the subject
local_subj_lookup(loc=loc, subj=value, name=name)
# then use opersist to set the subject's role
Expand Down

0 comments on commit af2116e

Please sign in to comment.