diff --git a/backend/geonature/core/gn_meta/mtd/__init__.py b/backend/geonature/core/gn_meta/mtd/__init__.py
index 156888ef28..9fcf8960f4 100644
--- a/backend/geonature/core/gn_meta/mtd/__init__.py
+++ b/backend/geonature/core/gn_meta/mtd/__init__.py
@@ -36,6 +36,7 @@
class MTDInstanceApi:
af_path = "/mtd/cadre/export/xml/GetRecordsByInstanceId?id={ID_INSTANCE}"
ds_path = "/mtd/cadre/jdd/export/xml/GetRecordsByInstanceId?id={ID_INSTANCE}"
+ # TODO: check if there are endpoints to retrieve metadata for a given user and instance, and not only a given user and whatever instance
ds_user_path = "/mtd/cadre/jdd/export/xml/GetRecordsByUserId?id={ID_ROLE}"
af_user_path = "/mtd/cadre/export/xml/GetRecordsByUserId?id={ID_ROLE}"
single_af_path = "/mtd/cadre/export/xml/GetRecordById?id={ID_AF}" # NOTE: `ID_AF` is actually an UUID and not an ID from the point of view of geonature database.
@@ -60,6 +61,9 @@ def _get_xml(self, path):
def _get_af_xml(self):
return self._get_xml(self.af_path)
+ # TODO: make the functions `get_af_list` and `get_ds_list` homogeneous
+ # - Use functions `parse_acquisition_framworks_xml`, and `parse_datasets_xml`, OR `parse_acquisition_framework`, and `parse_dataset`
+
def get_af_list(self) -> list:
xml = self._get_af_xml()
_xml_parser = etree.XMLParser(ns_clean=True, recover=True, encoding="utf-8")
@@ -206,6 +210,8 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
add_unexisting_digitizer(af["id_digitizer"] if not id_role else id_role)
user_add_total_time += time.time() - start_add_user_time
af = sync_af(af)
+ # TODO: choose whether or not to commit retrieval of the AF before association of actors
+ # and possibly retrieve an AF without any actor associated to it
# Commit here to retrieve the AF even if the association of actors that follows is to fail
db.session.commit()
# If the AF has not been retrieved, associated actors cannot be retrieved either
@@ -219,6 +225,7 @@ def process_af_and_ds(af_list, ds_list, id_role=None):
af.id_acquisition_framework,
af.unique_acquisition_framework_id,
)
+ # TODO: check the following TODO:
# TODO: remove actors removed from MTD
db.session.commit()
logger.debug("MTD - PROCESS DS LIST")
diff --git a/backend/geonature/core/gn_meta/mtd/mtd_utils.py b/backend/geonature/core/gn_meta/mtd/mtd_utils.py
index 80bc872c77..b754c7abc2 100644
--- a/backend/geonature/core/gn_meta/mtd/mtd_utils.py
+++ b/backend/geonature/core/gn_meta/mtd/mtd_utils.py
@@ -146,6 +146,9 @@ def sync_af(af):
# Handle case where af_uuid is None, as it would raise an error at database level when executing the statement below.
# None value for `af_uuid`, i.e. af UUID is missing, could be due to no UUID specified in `` tag in the XML file.
# If so, we skip the retrieval of the AF.
+ # TODO: choose between the two following alternatives:
+ # - (RETAINED) Just skipp the retrieval of the AF
+ # - Generate a UUID for the AF
if not af_uuid:
logger.warning(
f"No UUID provided for the AF with UUID '{af_uuid}' and name '{name_af}' - SKIPPING SYNCHRONIZATION FOR THIS AF."
@@ -247,6 +250,8 @@ def associate_actors(
uuid_organism = actor["uuid_organism"]
organism_name = actor["organism"]
email_actor = actor["email"]
+ # TODO: choose whether to add or update an organism with no UUID specified
+ # - add or update it using organism name only - field `organism`
if uuid_organism:
with DB.session.begin_nested():
# create or update organisme
@@ -261,6 +266,7 @@ def associate_actors(
# /!\ Handle case where there is also an organism with the name equals to the value of `name_organism`
# - check if there already is an organism with the name `organism_name`
# - if there is one:
+ # - TODO: choose wether to update the organism with the values `organism_name` and `email_actor` or not
# - set `id_organism` with the ID of the existing organism
# - if there is not:
# - set `id_organism` with the ID of a newly created organism
@@ -269,6 +275,18 @@ def associate_actors(
exists().where(BibOrganismes.nom_organisme == organism_name).select()
)
if is_exists_organism:
+ # TODO: choose whether to keep the following section - update of the organism with the values `organism_name` and `email_actor` ?
+ # --- If yes: uncomment the two following statements
+ # uuid_organism = DB.session.scalar(
+ # select(BibOrganismes.uuid_organisme)
+ # .where(BibOrganismes.nom_organisme == organism_name)
+ # .limit(1)
+ # )
+ # id_organism = add_or_update_organism(
+ # uuid=uuid_organism,
+ # nom=organism_name,
+ # email=email_actor,
+ # )
id_organism = DB.session.scalar(
select(BibOrganismes.id_organisme)
.where(BibOrganismes.nom_organisme == organism_name)
@@ -292,8 +310,17 @@ def associate_actors(
id_nomenclature_actor_role=id_nomenclature_actor_role,
**{pk_name: pk_value},
)
+ # TODO: choose wether to:
+ # - (RETAINED) Try to associate to an organism first and then to a user
+ # - Try to associate to a user first and then to an organism
+ # Try to associate to an organism first, and if that is impossible, to a user
if id_organism:
values["id_organism"] = id_organism
+ # TODO: handle case where no user is retrieved for the actor email:
+ # - (retained) If the actor role is "Contact Principal" associate to a new user with only a UUID and an ID, else just do not try to associate the actor with the metadata
+ # - Try to retrieve an id_organism from the organism name - field `organism`
+ # - Try to retrieve an id_organism from the actor email considered as the organism email - field `email`
+ # - Try to insert a new user from the actor name - field `name` - and possibly also email - field `email`
else:
id_user_from_email = DB.session.scalar(
select(User.id_role).filter_by(email=email_actor).where(User.groupe.is_(False))
@@ -310,8 +337,73 @@ def associate_actors(
# in particular:
# - we do not specify field `email` even if `email_actor` is to be set
# - only the field `dec_role` will be written to a non-default value, so as to identify this particular "Contact principal"-for-orphan-metadata user
+ # TODO: FUNCTIONAL - verify:
+ # - whether to:
+ # - (alternative 0) systematically add a new user for each concerned metadata
+ # - (RETAINED FOR THE MOMENT) or rather use a single user for all metadata without retrieved "Contact Principal"
+ # - whether it is right to use only (strictly) negative values for those new users:
+ # - must ensure that we will not already have (strictly) negative values in the `utilisateurs.t_roles` table for concerned instances (GINCO, DEPOBIO, ...)
+ # - what are the limits for 'serial4' PostgreSQL type: minimum and maximum values, ...
+ # - alternatives:
+ # - using the nextval sequence several times until an unused value is found BUT possible future conflict with new entries from INPN users
+ # - take the first value superior to 0 and which is not already used BUT possible future conflict with new entries from INPN users
+ # - take a base value which is enough high to avoid conflicts with INPN users IDs
+ # - start from the maximum or minimum value allowed by 'serial4' PostgreSQL type
+ # - whether to add a new user or a new organism...
cd_nomenclature_actor_role_for_contact_principal = "1"
if cd_nomenclature_actor_role == cd_nomenclature_actor_role_for_contact_principal:
+ ### Alternative 0 (see TODO above):
+ ### UNCOMMENT the folowing section if this alternative is eventually chosen
+ # # Get an integer that is equals to 1 less than the minimum of `utilisateurs.id_role` field in database
+ # # to ensure that the new ID is not already used
+ # # - we cannot rely on the next value from the sequence `utilisateurs.t_roles.t_roles_id_role_seq` because we can have,
+ # # and actually have for GINCO and DEPOBIO instances, ID values higher than this next value: this is due
+ # # to the fact that entries are inserted in the table `utilisateurs.t_roles` specifying the ID rather than
+ # # using the nextval sequence.
+ # # - moreover, we cannot actually take a (strictly) positive integer value as we risk to take an ID that could
+ # # later be needed for the retrieval of a user from the INPN as users are retrieved from the INPN taking
+ # # values associated to them in the INPN, especially the ID, for the insert in `utilisateurs.t_roles`.
+ # # - in other words, the field `utilisateurs.t_roles.id_roles` is actually not localized to the GeoNature instance, at
+ # # least for instances integrated with the INPN such as GINCO and DEPOBIO instances.
+ # # We take a value that is at most equal to -1 so as to clearly identify those generated users as the users
+ # # with (strictly) negative IDs.
+ # min_id_role = DB.session.scalar(
+ # select(func.min(User.id_role))
+ # )
+ # if min_id_role == 1:
+ # min_id_role = -1
+ # if min_id_role:
+ # id_generated_user = min_id_role - 1
+ # else:
+ # id_generated_user = 1
+ ### RETAINED FOR THE MOMENT (see TODO above):
+ # TODO:
+ # - i. TODO: Choose of ID for user created for metadata with no "Contact Principal" that could be retrieved - alternatives:
+ # - (RETAINED FOR THE MOMENT)(alternative 0.1) value 0 - assuming that 0 is never used amongst the different instances
+ # - (alternative 0.2) value -1 - assuming that -1 is never used amongst the different instances AND that it is a valid value for 'serial4'
+ # actually it should not be possible to have negative values for the 'serial4' PostgreSQL type,
+ # which should allow values ranging from 1 to 2147483647
+ # // documentation PG 16: https://www.postgresql.org/docs/current/datatype-numeric.html#DATATYPE-NUMERIC
+ # BUT a test creating a user with ID = -1 in a GN database has been done successfully...
+ # TODO: determiner why is it possible to have a value of -1, and other negative values (-2 has been tested also)
+ # - (alternative 0.3) value 2147483647 - maximum value for 'serial4' PostgreSQL type
+ # - (alternative 0.4) minimum negative value [to be determined]
+ # - ii. TODO: Choose whether to
+ # - add textual information to the created user
+ # - which information to provide:
+ # - (RETAINED FOR THE MOMENT) "Contact Principal for 'orphan' metadata - with no 'Contact Principal' that could be retrieved during MTD INPN synchronisation"
+ # - (alternatives) ...
+ # - which fields in `utilisateurs.t_roles` to be written:
+ # - (?) `identifiant`
+ # - (?) `nom_role`
+ # - (?) `prenom_role`
+ # - (?) `email`
+ # - (?) `id_organisme`
+ # - (RETAINED FOR THE MOMENT) `desc_role` (text)
+ # - (RETAINED FOR THE MOMENT) "Contact Principal for 'orphan' metadata - i.e. with no 'Contact Principal' that could be retrieved during INPN MTD synchronisation"
+ # - (??) `champs_addi`
+ # - (?) `remarques` (text)
+ # - (alternative) or not - the user will only have: an ID, a UUID
# Retrieve the "Contact principal"-for-orphan-metadata user
desc_role_for_user_contact_principal_for_orphan_metadata = "Contact principal for 'orphan' metadata - i.e. with no 'Contact Principal' that could be retrieved during INPN MTD synchronisation"
id_user_contact_principal_for_orphan_metadata = 0
@@ -341,6 +433,7 @@ def associate_actors(
dict_data_generated_user = insert_or_update_role(
data=dict_data_generated_user
)
+ # TODO: verify it is ok to commit here
# Commit to ensure that the insert from previous statement is actually committed
DB.session.commit()
values["id_role"] = id_user_contact_principal_for_orphan_metadata
diff --git a/backend/geonature/core/gn_meta/mtd/xml_parser.py b/backend/geonature/core/gn_meta/mtd/xml_parser.py
index cc1db341c4..673e52adbd 100644
--- a/backend/geonature/core/gn_meta/mtd/xml_parser.py
+++ b/backend/geonature/core/gn_meta/mtd/xml_parser.py
@@ -62,6 +62,13 @@ def parse_actors_xml(actors):
return actor_list
+# TODO: IMPORTANT - filter the list of acquisition frameworks with `ID_INSTANCE_FILTER` as made for the list of datasets
+
+# TODO: make functions for AF and DS homogeneous: refactorize
+# - Have functions `parse_acquisition_frameworks_xml`, and `parse_datasets_xml`, OR `parse_acquisition_framework`, and `parse_dataset`
+# - Eventually split into distinct functions the XML parsing and the mapping of fields
+
+
def parse_acquisition_framwork_xml(xml):
"""
Parse an xml of AF from a string
@@ -153,6 +160,10 @@ def format_acquisition_framework_id_from_xml(provided_af_uuid) -> Union[str, Non
for jdd in root.findall(".//" + namespace + "JeuDeDonnees"):
# We extract all the required informations from the different tags of the XML file
jdd_uuid = get_tag_content(jdd, "identifiantJdd")
+ # TODO: handle case where value for the tag `` in the XML file is not of the form `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
+ # Solutions - if in the form `http://oafs.fr/meta/ca/xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` (has some entries for INPN MTD PREPROD and instance 'Thématique') :
+ # - (RETAINED) Format by keeping only the `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx` part
+ # - Add a check further in the MTD sync to process only if ca_uuid is in the right format
ca_uuid = format_acquisition_framework_id_from_xml(get_tag_content(jdd, "identifiantCadre"))
dataset_name = get_tag_content(jdd, "libelle")
dataset_shortname = get_tag_content(jdd, "libelleCourt", default_value="")
diff --git a/backend/geonature/core/gn_meta/routes.py b/backend/geonature/core/gn_meta/routes.py
index 5bb91bbee0..15af2e15ae 100644
--- a/backend/geonature/core/gn_meta/routes.py
+++ b/backend/geonature/core/gn_meta/routes.py
@@ -79,12 +79,14 @@
@routes.before_request
def synchronize_mtd():
+ # TODO: add `if request.method != "OPTIONS" and [...]` in the following condition
if request.endpoint in ["gn_meta.get_datasets", "gn_meta.get_acquisition_frameworks_list"]:
from flask_login import current_user
if current_user.is_authenticated:
params = request.json if request.is_json else request.args
try:
+ # TODO: trigger a user sync without id_af in case no id_af is provided
list_id_af = params.get("id_acquisition_frameworks", [])
for id_af in list_id_af:
sync_af_and_ds_by_user(id_role=current_user.id_role, id_af=id_af)