Skip to content

Commit

Permalink
[WIP](mtd): several TODOs
Browse files Browse the repository at this point in the history
...
  • Loading branch information
VincentCauchois committed Nov 4, 2024
1 parent cd58ab7 commit b3daed0
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 0 deletions.
7 changes: 7 additions & 0 deletions backend/geonature/core/gn_meta/mtd/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
class MTDInstanceApi:
af_path = "/mtd/cadre/export/xml/GetRecordsByInstanceId?id={ID_INSTANCE}"
ds_path = "/mtd/cadre/jdd/export/xml/GetRecordsByInstanceId?id={ID_INSTANCE}"
# TODO: check if there are endpoints to retrieve metadata for a given user and instance, and not only a given user and whatever instance
ds_user_path = "/mtd/cadre/jdd/export/xml/GetRecordsByUserId?id={ID_ROLE}"
af_user_path = "/mtd/cadre/export/xml/GetRecordsByUserId?id={ID_ROLE}"
single_af_path = "/mtd/cadre/export/xml/GetRecordById?id={ID_AF}" # NOTE: `ID_AF` is actually an UUID and not an ID from the point of view of geonature database.
Expand All @@ -60,6 +61,9 @@ def _get_xml(self, path):
def _get_af_xml(self):
return self._get_xml(self.af_path)

# TODO: make the functions `get_af_list` and `get_ds_list` homogeneous
# - Use functions `parse_acquisition_framworks_xml`, and `parse_datasets_xml`, OR `parse_acquisition_framework`, and `parse_dataset`

def get_af_list(self) -> list:
xml = self._get_af_xml()
_xml_parser = etree.XMLParser(ns_clean=True, recover=True, encoding="utf-8")
Expand Down Expand Up @@ -206,6 +210,8 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
add_unexisting_digitizer(af["id_digitizer"] if not id_role else id_role)
user_add_total_time += time.time() - start_add_user_time
af = sync_af(af)
# TODO: choose whether or not to commit retrieval of the AF before association of actors
# and possibly retrieve an AF without any actor associated to it
# Commit here to retrieve the AF even if the association of actors that follows is to fail
db.session.commit()
# If the AF has not been retrieved, associated actors cannot be retrieved either
Expand All @@ -219,6 +225,7 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
af.id_acquisition_framework,
af.unique_acquisition_framework_id,
)
# TODO: check the following TODO:
# TODO: remove actors removed from MTD
db.session.commit()
logger.debug("MTD - PROCESS DS LIST")
Expand Down
93 changes: 93 additions & 0 deletions backend/geonature/core/gn_meta/mtd/mtd_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,9 @@ def sync_af(af):
# Handle case where af_uuid is None, as it would raise an error at database level when executing the statement below.
# None value for `af_uuid`, i.e. af UUID is missing, could be due to no UUID specified in `<ca:identifiantCadre/>` tag in the XML file.
# If so, we skip the retrieval of the AF.
# TODO: choose between the two following alternatives:
# - (RETAINED) Just skipp the retrieval of the AF
# - Generate a UUID for the AF
if not af_uuid:
logger.warning(
f"No UUID provided for the AF with UUID '{af_uuid}' and name '{name_af}' - SKIPPING SYNCHRONIZATION FOR THIS AF."
Expand Down Expand Up @@ -247,6 +250,8 @@ def associate_actors(
uuid_organism = actor["uuid_organism"]
organism_name = actor["organism"]
email_actor = actor["email"]
# TODO: choose whether to add or update an organism with no UUID specified
# - add or update it using organism name only - field `organism`
if uuid_organism:
with DB.session.begin_nested():
# create or update organisme
Expand All @@ -261,6 +266,7 @@ def associate_actors(
# /!\ Handle case where there is also an organism with the name equals to the value of `name_organism`
# - check if there already is an organism with the name `organism_name`
# - if there is one:
# - TODO: choose wether to update the organism with the values `organism_name` and `email_actor` or not
# - set `id_organism` with the ID of the existing organism
# - if there is not:
# - set `id_organism` with the ID of a newly created organism
Expand All @@ -269,6 +275,18 @@ def associate_actors(
exists().where(BibOrganismes.nom_organisme == organism_name).select()
)
if is_exists_organism:
# TODO: choose whether to keep the following section - update of the organism with the values `organism_name` and `email_actor` ?
# --- If yes: uncomment the two following statements
# uuid_organism = DB.session.scalar(
# select(BibOrganismes.uuid_organisme)
# .where(BibOrganismes.nom_organisme == organism_name)
# .limit(1)
# )
# id_organism = add_or_update_organism(
# uuid=uuid_organism,
# nom=organism_name,
# email=email_actor,
# )
id_organism = DB.session.scalar(
select(BibOrganismes.id_organisme)
.where(BibOrganismes.nom_organisme == organism_name)
Expand All @@ -292,8 +310,17 @@ def associate_actors(
id_nomenclature_actor_role=id_nomenclature_actor_role,
**{pk_name: pk_value},
)
# TODO: choose wether to:
# - (RETAINED) Try to associate to an organism first and then to a user
# - Try to associate to a user first and then to an organism
# Try to associate to an organism first, and if that is impossible, to a user
if id_organism:
values["id_organism"] = id_organism
# TODO: handle case where no user is retrieved for the actor email:
# - (retained) If the actor role is "Contact Principal" associate to a new user with only a UUID and an ID, else just do not try to associate the actor with the metadata
# - Try to retrieve an id_organism from the organism name - field `organism`
# - Try to retrieve an id_organism from the actor email considered as the organism email - field `email`
# - Try to insert a new user from the actor name - field `name` - and possibly also email - field `email`
else:
id_user_from_email = DB.session.scalar(
select(User.id_role).filter_by(email=email_actor).where(User.groupe.is_(False))
Expand All @@ -310,8 +337,73 @@ def associate_actors(
# in particular:
# - we do not specify field `email` even if `email_actor` is to be set
# - only the field `dec_role` will be written to a non-default value, so as to identify this particular "Contact principal"-for-orphan-metadata user
# TODO: FUNCTIONAL - verify:
# - whether to:
# - (alternative 0) systematically add a new user for each concerned metadata
# - (RETAINED FOR THE MOMENT) or rather use a single user for all metadata without retrieved "Contact Principal"
# - whether it is right to use only (strictly) negative values for those new users:
# - must ensure that we will not already have (strictly) negative values in the `utilisateurs.t_roles` table for concerned instances (GINCO, DEPOBIO, ...)
# - what are the limits for 'serial4' PostgreSQL type: minimum and maximum values, ...
# - alternatives:
# - using the nextval sequence several times until an unused value is found BUT possible future conflict with new entries from INPN users
# - take the first value superior to 0 and which is not already used BUT possible future conflict with new entries from INPN users
# - take a base value which is enough high to avoid conflicts with INPN users IDs
# - start from the maximum or minimum value allowed by 'serial4' PostgreSQL type
# - whether to add a new user or a new organism...
cd_nomenclature_actor_role_for_contact_principal = "1"
if cd_nomenclature_actor_role == cd_nomenclature_actor_role_for_contact_principal:
### Alternative 0 (see TODO above):
### UNCOMMENT the folowing section if this alternative is eventually chosen
# # Get an integer that is equals to 1 less than the minimum of `utilisateurs.id_role` field in database
# # to ensure that the new ID is not already used
# # - we cannot rely on the next value from the sequence `utilisateurs.t_roles.t_roles_id_role_seq` because we can have,
# # and actually have for GINCO and DEPOBIO instances, ID values higher than this next value: this is due
# # to the fact that entries are inserted in the table `utilisateurs.t_roles` specifying the ID rather than
# # using the nextval sequence.
# # - moreover, we cannot actually take a (strictly) positive integer value as we risk to take an ID that could
# # later be needed for the retrieval of a user from the INPN as users are retrieved from the INPN taking
# # values associated to them in the INPN, especially the ID, for the insert in `utilisateurs.t_roles`.
# # - in other words, the field `utilisateurs.t_roles.id_roles` is actually not localized to the GeoNature instance, at
# # least for instances integrated with the INPN such as GINCO and DEPOBIO instances.
# # We take a value that is at most equal to -1 so as to clearly identify those generated users as the users
# # with (strictly) negative IDs.
# min_id_role = DB.session.scalar(
# select(func.min(User.id_role))
# )
# if min_id_role == 1:
# min_id_role = -1
# if min_id_role:
# id_generated_user = min_id_role - 1
# else:
# id_generated_user = 1
### RETAINED FOR THE MOMENT (see TODO above):
# TODO:
# - i. TODO: Choose of ID for user created for metadata with no "Contact Principal" that could be retrieved - alternatives:
# - (RETAINED FOR THE MOMENT)(alternative 0.1) value 0 - assuming that 0 is never used amongst the different instances
# - (alternative 0.2) value -1 - assuming that -1 is never used amongst the different instances AND that it is a valid value for 'serial4'
# actually it should not be possible to have negative values for the 'serial4' PostgreSQL type,
# which should allow values ranging from 1 to 2147483647
# // documentation PG 16: https://www.postgresql.org/docs/current/datatype-numeric.html#DATATYPE-NUMERIC
# BUT a test creating a user with ID = -1 in a GN database has been done successfully...
# TODO: determiner why is it possible to have a value of -1, and other negative values (-2 has been tested also)
# - (alternative 0.3) value 2147483647 - maximum value for 'serial4' PostgreSQL type
# - (alternative 0.4) minimum negative value [to be determined]
# - ii. TODO: Choose whether to
# - add textual information to the created user
# - which information to provide:
# - (RETAINED FOR THE MOMENT) "Contact Principal for 'orphan' metadata - with no 'Contact Principal' that could be retrieved during MTD INPN synchronisation"
# - (alternatives) ...
# - which fields in `utilisateurs.t_roles` to be written:
# - (?) `identifiant`
# - (?) `nom_role`
# - (?) `prenom_role`
# - (?) `email`
# - (?) `id_organisme`
# - (RETAINED FOR THE MOMENT) `desc_role` (text)
# - (RETAINED FOR THE MOMENT) "Contact Principal for 'orphan' metadata - i.e. with no 'Contact Principal' that could be retrieved during INPN MTD synchronisation"
# - (??) `champs_addi`
# - (?) `remarques` (text)
# - (alternative) or not - the user will only have: an ID, a UUID
# Retrieve the "Contact principal"-for-orphan-metadata user
desc_role_for_user_contact_principal_for_orphan_metadata = "Contact principal for 'orphan' metadata - i.e. with no 'Contact Principal' that could be retrieved during INPN MTD synchronisation"
id_user_contact_principal_for_orphan_metadata = 0
Expand Down Expand Up @@ -341,6 +433,7 @@ def associate_actors(
dict_data_generated_user = insert_or_update_role(
data=dict_data_generated_user
)
# TODO: verify it is ok to commit here
# Commit to ensure that the insert from previous statement is actually committed
DB.session.commit()
values["id_role"] = id_user_contact_principal_for_orphan_metadata
Expand Down
11 changes: 11 additions & 0 deletions backend/geonature/core/gn_meta/mtd/xml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ def parse_actors_xml(actors):
return actor_list


# TODO: IMPORTANT - filter the list of acquisition frameworks with `ID_INSTANCE_FILTER` as made for the list of datasets

# TODO: make functions for AF and DS homogeneous: refactorize
# - Have functions `parse_acquisition_frameworks_xml`, and `parse_datasets_xml`, OR `parse_acquisition_framework`, and `parse_dataset`
# - Eventually split into distinct functions the XML parsing and the mapping of fields


def parse_acquisition_framwork_xml(xml):
"""
Parse an xml of AF from a string
Expand Down Expand Up @@ -153,6 +160,10 @@ def format_acquisition_framework_id_from_xml(provided_af_uuid) -> Union[str, Non
for jdd in root.findall(".//" + namespace + "JeuDeDonnees"):
# We extract all the required informations from the different tags of the XML file
jdd_uuid = get_tag_content(jdd, "identifiantJdd")
# TODO: handle case where value for the tag `<jdd:identifiantCadre>` in the XML file is not of the form `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
# Solutions - if in the form `http://oafs.fr/meta/ca/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` (has some entries for INPN MTD PREPROD and instance 'Thématique') :
# - (RETAINED) Format by keeping only the `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` part
# - Add a check further in the MTD sync to process only if ca_uuid is in the right format
ca_uuid = format_acquisition_framework_id_from_xml(get_tag_content(jdd, "identifiantCadre"))
dataset_name = get_tag_content(jdd, "libelle")
dataset_shortname = get_tag_content(jdd, "libelleCourt", default_value="")
Expand Down
2 changes: 2 additions & 0 deletions backend/geonature/core/gn_meta/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,14 @@

@routes.before_request
def synchronize_mtd():
# TODO: add `if request.method != "OPTIONS" and [...]` in the following condition
if request.endpoint in ["gn_meta.get_datasets", "gn_meta.get_acquisition_frameworks_list"]:
from flask_login import current_user

if current_user.is_authenticated:
params = request.json if request.is_json else request.args
try:
# TODO: trigger a user sync without id_af in case no id_af is provided
list_id_af = params.get("id_acquisition_frameworks", [])
for id_af in list_id_af:
sync_af_and_ds_by_user(id_role=current_user.id_role, id_af=id_af)
Expand Down

0 comments on commit b3daed0

Please sign in to comment.