diff --git a/pyproject.toml b/pyproject.toml index 6b5a79fa3..74148566f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ select = ["E", "F", "I", "N", "UP", "W"] [tool.pyright] include = [ "python/tests", + "python/lib/bids", "python/lib/db", "python/lib/exception", "python/lib/config_file.py", @@ -23,6 +24,7 @@ include = [ "python/lib/get_subject_session.py", "python/lib/logging.py", "python/lib/make_env.py", + "python/lib/util.py", "python/lib/validate_subject_info.py", ] typeCheckingMode = "strict" diff --git a/python/lib/bids/candidate.py b/python/lib/bids/candidate.py new file mode 100644 index 000000000..819ca342f --- /dev/null +++ b/python/lib/bids/candidate.py @@ -0,0 +1,57 @@ +import sys +from typing import Optional + +from bids.layout import BIDSLayout +from sqlalchemy.orm import Session as Database + +from lib.db.models.candidate import DbCandidate +from lib.db.queries.candidate import try_get_candidate_with_cand_id, try_get_candidate_with_psc_id +from lib.util import filter_map, try_parse_int + + +def get_bids_candidates(db: Database, bids_layout: BIDSLayout) -> list[DbCandidate]: + """ + Get all the candidates of a BIDS dataset from the database, using the BIDS subject labels. + """ + + # Get the subject labels of the BIDS dataset. + bids_subject_labels: list[str] = bids_layout.get_subjects() # type: ignore + + # Return the candidates found for each subject label. + return list(filter_map( + lambda bids_subject_label: get_bids_candidate(db, bids_subject_label), + bids_subject_labels, + )) + + +def get_bids_candidate(db: Database, bids_subject_label: str) -> Optional[DbCandidate]: + """ + Get a candidate from the database using a BIDS subject label. + """ + + # Check if the BIDS subject label looks might be a CandID. + cand_id = try_parse_int(bids_subject_label) + + # If the BIDS subject label might be a CandID, try to get the candidate using it as a CandID. + if cand_id is not None: + candidate = try_get_candidate_with_cand_id(db, cand_id) + if candidate is not None: + return candidate + + # Try to get the candidate using the BIDS subject label as a PSCID. + candidate = try_get_candidate_with_psc_id(db, bids_subject_label) + if candidate is not None: + return candidate + + # All the candidates of the BIDS dataset should have been in the database at this stage. Print + # a warning if no candidate was found. + print( + ( + f"WARNING: No candidate found for BIDS subject label '{bids_subject_label}'," + " candidate omitted from the participants file" + ), + file=sys.stderr, + ) + + # Return `None` if no candidate is found. + return None diff --git a/python/lib/bids/participant.py b/python/lib/bids/participant.py new file mode 100644 index 000000000..216ebd6f0 --- /dev/null +++ b/python/lib/bids/participant.py @@ -0,0 +1,93 @@ +from dataclasses import dataclass + +import dateutil.parser +from bids import BIDSLayout + +import lib.utilities as utilities +from lib.db.models.candidate import DbCandidate + + +@dataclass +class BidsParticipant: + """ + Information about a BIDS participant represented in an entry in the `participants.tsv` file of + a BIDS dataset. + """ + + id: str + birth_date: str | None = None + sex: str | None = None + age: str | None = None + site: str | None = None + cohort: str | None = None + project: str | None = None + # FIXME: Both "cohort" and "subproject" are used in scripts, this may be a bug. + subproject: str | None = None + + +def read_bids_participants_file(bids_layout: BIDSLayout) -> list[BidsParticipant] | None: + """ + Find, read and parse the `participants.tsv` file of a BIDS dataset. Return the BIDS participant + entries if a file is found, or `None` otherwise. + """ + + # Find the `participants.tsv` file in the BIDS dataset. + bids_participants_file_path = None + for bids_file_path in bids_layout.get(suffix='participants', return_type='filename'): # type: ignore + if 'participants.tsv' in bids_file_path: + bids_participants_file_path = bids_file_path # type: ignore + break + + # If no `participants.tsv` file is found, return `None`. + if bids_participants_file_path is None: + return None + + # Parse the BIDS participant entries from the `participants.tsv` file. + bids_participant_rows = utilities.read_tsv_file(bids_participants_file_path) # type: ignore + return list(map(read_bids_participant_row, bids_participant_rows)) # type: ignore + + +def read_bids_participant_row(row: dict[str, str]) -> BidsParticipant: + """ + Get a BIDS participant entry from a parsed TSV line of a `participants.tsv` file. + """ + + # Get the participant ID by removing the `sub-` prefix if it is present. + participant_id = row['participant_id'].replace('sub-', '') + + # Get the participant date of birth from one of the possible date of birth fields. + birth_date = None + for birth_date_name in ['date_of_birth', 'birth_date', 'dob']: + if birth_date_name in row: + birth_date = dateutil.parser.parse(row[birth_date_name]).strftime('%Y-%m-%d') + break + + # Create the BIDS participant object. + return BidsParticipant( + id = participant_id, + birth_date = birth_date, + sex = row.get('sex'), + age = row.get('age'), + site = row.get('site'), + cohort = row.get('cohort'), + project = row.get('project'), + subproject = row.get('subproject'), + ) + + +def get_bids_participant_from_candidate(candidate: DbCandidate) -> BidsParticipant: + """ + Generate a BIDS participant entry from a database candidate. + """ + + # Stringify the candidate date of birth if there is one. + birth_date = candidate.date_of_birth.strftime('%Y-%m-%d') if candidate.date_of_birth is not None else None + + # Create the BIDS participant object corresponding to the database candidate. + return BidsParticipant( + id = candidate.psc_id, + birth_date = birth_date, + sex = candidate.sex, + site = candidate.registration_site.name, + project = candidate.registration_project.name, + ) diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py index f63105186..e00ef1bce 100755 --- a/python/lib/bidsreader.py +++ b/python/lib/bidsreader.py @@ -3,11 +3,13 @@ import json import re import sys +from collections.abc import Generator +from dataclasses import dataclass from bids import BIDSLayout import lib.exitcode -import lib.utilities as utilities +from lib.bids.participant import BidsParticipant, read_bids_participants_file # import bids # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 @@ -21,6 +23,18 @@ __license__ = "GPLv3" +@dataclass +class BidsSessionInfo: + """ + Information about a BIDS session, that is, the label of the subject and the session, and the + modalities of this session. + """ + + subject_label: str + session_label: str | None + modalities: list[str] + + class BidsReader: """ This class reads a BIDS structure into a data dictionary using BIDS grabbids. @@ -35,16 +49,13 @@ class BidsReader: bids_reader = BidsReader(bids_dir) """ - def __init__(self, bids_dir, verbose, validate = True): + def __init__(self, bids_dir: str, verbose: bool, validate: bool = True): """ Constructor method for the BidsReader class. - :param bids_dir: path to the BIDS structure to read - :type bids_dir: str - :param verbose : boolean to print verbose information - :type verbose : bool + :param bids_dir : path to the BIDS structure to read + :param verbose : boolean to print verbose information :param validate : boolean to validate the BIDS dataset - :type validate : bool """ self.verbose = verbose @@ -65,7 +76,7 @@ def __init__(self, bids_dir, verbose, validate = True): print("WARNING: Cannot read dataset_description.json") # load BIDS candidates information - self.participants_info = self.load_candidates_from_bids() + self.bids_participants = self.load_candidates_from_bids() # load BIDS sessions information self.cand_sessions_list = self.load_sessions_from_bids() @@ -73,7 +84,7 @@ def __init__(self, bids_dir, verbose, validate = True): # load BIDS modality information self.cand_session_modalities_list = self.load_modalities_from_bids() - def load_bids_data(self, validate): + def load_bids_data(self, validate: bool): """ Loads the BIDS study using the BIDSLayout function (part of the pybids package) and return the object. @@ -84,7 +95,7 @@ def load_bids_data(self, validate): if self.verbose: print('Loading the BIDS dataset with BIDS layout library...\n') - exclude_arr = ['/code/', '/sourcedata/', '/log/', '.git/'] + exclude_arr = ['code', 'sourcedata', 'log', '.git'] force_arr = [re.compile(r"_annotations\.(tsv|json)$")] # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 @@ -114,42 +125,34 @@ def load_bids_data(self, validate): return bids_layout - def load_candidates_from_bids(self): + def load_candidates_from_bids(self) -> list[BidsParticipant]: """ Loads the list of candidates from the BIDS study. List of - participants and their information will be stored in participants_info. + participants and their information will be stored in bids_participants. :return: list of dictionaries with participant information from BIDS - :rtype: list """ if self.verbose: print('Grepping candidates from the BIDS layout...') - # grep the participant.tsv file and parse it - participants_info = None - for file in self.bids_layout.get(suffix='participants', return_type='filename'): - # note file[0] returns the path to participants.tsv - if 'participants.tsv' in file: - participants_info = utilities.read_tsv_file(file) - else: - continue + bids_participants = read_bids_participants_file(self.bids_layout) - if participants_info: - self.candidates_list_validation(participants_info) + if bids_participants: + self.candidates_list_validation(bids_participants) else: bids_subjects = self.bids_layout.get_subjects() - participants_info = [{'participant_id': sub_id} for sub_id in bids_subjects] + bids_participants = [BidsParticipant(sub_id) for sub_id in bids_subjects] if self.verbose: print('\t=> List of participants found:') - for participant in participants_info: - print('\t\t' + participant['participant_id']) + for bids_participant in bids_participants: + print('\t\t' + bids_participant.id) print('\n') - return participants_info + return bids_participants - def candidates_list_validation(self, participants_info): + def candidates_list_validation(self, bids_participants: list[BidsParticipant]): """ Validates whether the subjects listed in participants.tsv match the list of participant directory. If there is a mismatch, will exit with @@ -165,18 +168,16 @@ def candidates_list_validation(self, participants_info): "participants.tsv and raw data found in the BIDS " "directory") - # check that all subjects listed in participants_info are also in + # check that all subjects listed in bids_participants are also in # subjects array and vice versa - for row in participants_info: - # remove the "sub-" in front of the subject ID if present - row['participant_id'] = row['participant_id'].replace('sub-', '') - if row['participant_id'] not in subjects: + for bids_participant in bids_participants: + if bids_participant.id not in subjects: print(mismatch_message) - print(row['participant_id'] + 'is missing from the BIDS Layout') + print(bids_participant.id + 'is missing from the BIDS Layout') print('List of subjects parsed by the BIDS layout: ' + ', '.join(subjects)) sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) # remove the subject from the list of subjects - subjects.remove(row['participant_id']) + subjects.remove(bids_participant.id) # check that no subjects are left in subjects array if subjects: @@ -186,14 +187,13 @@ def candidates_list_validation(self, participants_info): if self.verbose: print('\t=> Passed validation of the list of participants\n') - def load_sessions_from_bids(self): + def load_sessions_from_bids(self) -> dict[str, list[str]]: """ Grep the list of sessions for each candidate directly from the BIDS structure. :return: dictionary with the list of sessions and candidates found in the BIDS structure - :rtype: dict """ if self.verbose: @@ -201,9 +201,9 @@ def load_sessions_from_bids(self): cand_sessions = {} - for row in self.participants_info: - ses = self.bids_layout.get_sessions(subject=row['participant_id']) - cand_sessions[row['participant_id']] = ses + for bids_participant in self.bids_participants: + ses = self.bids_layout.get_sessions(subject=bids_participant.id) + cand_sessions[bids_participant.id] = ses if self.verbose: print('\t=> List of sessions found:\n') @@ -216,57 +216,62 @@ def load_sessions_from_bids(self): return cand_sessions - def load_modalities_from_bids(self): + def load_modalities_from_bids(self) -> list[BidsSessionInfo]: """ Grep the list of modalities available for each session and candidate directly from the BIDS structure. :return: dictionary for candidate and session with list of modalities - :rtype: dict """ if self.verbose: print('Grepping the different modalities from the BIDS layout...') - cand_session_modalities_list = [] + cand_session_modalities_list: list[BidsSessionInfo] = [] for subject, visit_list in self.cand_sessions_list.items(): if visit_list: for visit in visit_list: modalities = self.bids_layout.get_datatype(subject=subject, session=visit) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': visit, - 'modalities' : modalities - }) + cand_session_modalities_list.append(BidsSessionInfo( + subject_label = subject, + session_label = visit, + modalities = modalities, + )) else: modalities = self.bids_layout.get_datatype(subject=subject) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': None, - 'modalities' : modalities - }) + cand_session_modalities_list.append(BidsSessionInfo( + subject_label = subject, + session_label = None, + modalities = modalities, + )) if self.verbose: print('\t=> Done grepping the different modalities from the BIDS layout\n') return cand_session_modalities_list + def iter_modality_combinations(self) -> Generator[tuple[str, str | None, str], None, None]: + """ + Iterate over the different subject / session / modality combinations present in the BIDS + dataset. + """ + + for cand_session_modalities in self.cand_session_modalities_list: + for modality in cand_session_modalities.modalities: + yield cand_session_modalities.subject_label, cand_session_modalities.session_label, modality + @staticmethod - def grep_file(files_list, match_pattern, derivative_pattern=None): + def grep_file(files_list: list[str], match_pattern: str, derivative_pattern: str | None = None) -> str | None: """ Grep a unique file based on a match pattern and returns it. - :param files_list : list of files to look into - :type files_list : list - :param match_pattern : pattern to use to find the file - :type match_pattern : str - :param derivative_pattern: derivative pattern to use if the file we look for - is a derivative file - :type derivative_pattern: str + :param files_list : list of files to look into + :param match_pattern : pattern to use to find the file + :param derivative_pattern : derivative pattern to use if the file we look for + is a derivative file :return: name of the first file that matches the pattern - :rtype: str """ for filename in files_list: diff --git a/python/lib/candidate.py b/python/lib/candidate.py index 370433815..f3bdf53b8 100644 --- a/python/lib/candidate.py +++ b/python/lib/candidate.py @@ -3,9 +3,8 @@ import random import sys -from dateutil.parser import parse - import lib.exitcode +from lib.bids.participant import BidsParticipant __license__ = "GPLv3" @@ -59,16 +58,15 @@ def __init__(self, verbose, psc_id=None, cand_id=None, sex=None, dob=None): self.center_id = None self.project_id = None - def create_candidate(self, db, participants_info): + def create_candidate(self, db, bids_participants: list[BidsParticipant]): """ Creates a candidate using BIDS information provided in the - participants_info's list. + bids_participants's list. :param db : database handler object :type db : object - :param participants_info: list of dictionary with participants + :param bids_participants: list of dictionary with participants information from BIDS - :type participants_info: list :return: dictionary with candidate info from the candidate's table :rtype: dict @@ -81,25 +79,26 @@ def create_candidate(self, db, participants_info): if not self.cand_id: self.cand_id = self.generate_cand_id(db) - for row in participants_info: - if not row['participant_id'] == self.psc_id: + for bids_participant in bids_participants: + if bids_participant.id != self.psc_id: continue - self.grep_bids_dob(row) - if 'sex' in row: - self.map_sex(row['sex']) - if 'age' in row: - self.age = row['age'] + + self.dob = bids_participant.date_of_birth + if bids_participant.sex is not None: + self.map_sex(bids_participant.sex) + if bids_participant.age is not None: + self.age = bids_participant.age # three steps to find site: # 1. try matching full name from 'site' column in participants.tsv in db # 2. try extracting alias from pscid # 3. try finding previous site in candidate table - if 'site' in row and row['site'].lower() not in ("null", ""): + if bids_participant.site is not None and bids_participant.site.lower() not in ('', 'null'): # search site id in psc table by its full name site_info = db.pselect( "SELECT CenterID FROM psc WHERE Name = %s", - [row['site'], ] + [bids_participant.site, ] ) if len(site_info) > 0: self.center_id = site_info[0]['CenterID'] @@ -108,7 +107,7 @@ def create_candidate(self, db, participants_info): # search site id in psc table by its alias extracted from pscid db_sites = db.pselect("SELECT CenterID, Alias FROM psc") for site in db_sites: - if site['Alias'] in row['participant_id']: + if site['Alias'] in bids_participant.id: self.center_id = site['CenterID'] if self.center_id is None: @@ -124,11 +123,11 @@ def create_candidate(self, db, participants_info): # 1. find full name in 'project' column in participants.tsv # 2. find previous in candidate table - if 'project' in row and row['project'].lower() not in ("null", ""): + if bids_participant.project is not None and bids_participant.project.lower() not in ('', 'null'): # search project id in Project table by its full name project_info = db.pselect( "SELECT ProjectID FROM Project WHERE Name = %s", - [row['project'], ] + [bids_participant.project, ] ) if len(project_info) > 0: self.project_id = project_info[0]['ProjectID'] @@ -220,22 +219,6 @@ def map_sex(self, sex): if sex.lower() in ('f', 'female'): self.sex = 'Female' - def grep_bids_dob(self, subject_info): - """ - Greps the date of birth from the BIDS structure and add it to self.dob which - will be inserted into the DoB field of the candidate table - - :param subject_info: dictionary with all information present in the BIDS - participants.tsv file for a given candidate - :type subject_info: dict - """ - - dob_names = ['date_of_birth', 'birth_date', 'dob'] - for name in dob_names: - if name in subject_info: - dob = parse(subject_info[name]) - self.dob = dob.strftime('%Y-%m-%d') - @staticmethod def generate_cand_id(db): """ diff --git a/python/lib/config.py b/python/lib/config.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/lib/db/queries/candidate.py b/python/lib/db/queries/candidate.py index 27a8bee5c..6f9a77ea4 100644 --- a/python/lib/db/queries/candidate.py +++ b/python/lib/db/queries/candidate.py @@ -6,9 +6,17 @@ def try_get_candidate_with_cand_id(db: Database, cand_id: int): """ - Get a candidate from the database using its CandID, or return `None` if no candidate is - found. + Get a candidate from the database using its CandID, or return `None` if no candidate is found. """ query = select(DbCandidate).where(DbCandidate.cand_id == cand_id) return db.execute(query).scalar_one_or_none() + + +def try_get_candidate_with_psc_id(db: Database, psc_id: str): + """ + Get a candidate from the database using its PSCID, or return `None` if no candidate is found. + """ + + query = select(DbCandidate).where(DbCandidate.psc_id == psc_id) + return db.execute(query).scalar_one_or_none() diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 0bc2b57fb..d1f4235fa 100755 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -4,10 +4,13 @@ import json import os import sys +from typing import Any, Literal import lib.exitcode import lib.utilities as utilities +from lib.bidsreader import BidsReader from lib.candidate import Candidate +from lib.database import Database from lib.database_lib.config import Config from lib.database_lib.physiological_event_archive import PhysiologicalEventArchive from lib.database_lib.physiological_event_file import PhysiologicalEventFile @@ -76,37 +79,28 @@ class Eeg: db.disconnect() """ - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, loris_bids_eeg_rel_dir, - loris_bids_root_dir, dataset_tag_dict, dataset_type): + def __init__( + self, bids_reader: BidsReader, bids_sub_id: str, bids_ses_id: str | None, bids_modality: str, db: Database, + verbose: bool, data_dir: str, default_visit_label: str, loris_bids_eeg_rel_dir: str, + loris_bids_root_dir: str | None, dataset_tag_dict: dict[Any, Any], + dataset_type: Literal['raw', 'derivative'] | None, + ): """ Constructor method for the Eeg class. - :param bids_reader : dictionary with BIDS reader information - :type bids_reader : dict + :param bids_reader : The BIDS reader object :param bids_sub_id : BIDS subject ID (that will be used as PSCID) - :type bids_sub_id : str :param bids_ses_id : BIDS session ID (that will be used for the visit label) - :type bids_ses_id : str :param bids_modality: BIDS modality (a.k.a. EEG) - :tyoe bids_modality: str :param db : Database class object - :type db : object :param verbose : whether to be verbose - :type verbose : bool :param data_dir : LORIS data directory path (usually /data/PROJECT/data) - :type data_dir : str :param default_visit_label : default visit label to be used if no BIDS session are present in the BIDS structure - :type default_visit_label : str :param loris_bids_eeg_rel_dir: LORIS BIDS EEG relative dir path to data_dir - :type loris_bids_eeg_rel_dir: str :param loris_bids_root_dir : LORIS BIDS root directory path - :type loris_bids_root_dir : str :param dataset_tag_dict : Dict of dataset-inherited HED tags - :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset - :type dataset_type : string """ # config @@ -146,13 +140,13 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.hed_union = self.db.pselect(query=hed_query, args=()) self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.psc_id: + for bids_participant in bids_reader.bids_participants: + if bids_participant.id != self.psc_id: continue - if 'cohort' in row: + if bids_participant.cohort is not None: cohort_info = db.pselect( "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] + [bids_participant.cohort, ] ) if len(cohort_info) > 0: self.cohort_id = cohort_info[0]['CohortID'] diff --git a/python/lib/mri.py b/python/lib/mri.py index 1d5cb380f..8b3e2956c 100644 --- a/python/lib/mri.py +++ b/python/lib/mri.py @@ -6,9 +6,13 @@ import re import sys +from bids.layout import BIDSFile + import lib.exitcode import lib.utilities as utilities +from lib.bidsreader import BidsReader from lib.candidate import Candidate +from lib.database import Database from lib.imaging import Imaging from lib.scanstsv import ScansTSV from lib.session import Session @@ -70,10 +74,11 @@ class Mri: db.disconnect() """ - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, - loris_bids_mri_rel_dir, loris_bids_root_dir): - + def __init__( + self, bids_reader: BidsReader, bids_sub_id: str, bids_ses_id: str | None, bids_modality: str, db: Database, + verbose: bool, data_dir: str, default_visit_label: str, loris_bids_mri_rel_dir: str, + loris_bids_root_dir : str | None, + ): # enumerate the different suffixes supported by BIDS per modality type self.possible_suffix_per_modality = { 'anat' : [ @@ -118,13 +123,13 @@ def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, self.center_id = self.loris_cand_info['RegistrationCenterID'] self.project_id = self.loris_cand_info['RegistrationProjectID'] self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.psc_id: + for bids_participant in bids_reader.bids_participants: + if bids_participant.id != self.psc_id: continue - if 'cohort' in row: + if bids_participant.cohort is not None: cohort_info = db.pselect( "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] + [bids_participant.cohort, ] ) if len(cohort_info) > 0: self.cohort_id = cohort_info[0]['CohortID'] @@ -190,12 +195,11 @@ def get_loris_session_id(self): return loris_vl_info['ID'] - def grep_nifti_files(self): + def grep_nifti_files(self) -> list[BIDSFile]: """ Returns the list of NIfTI files found for the modality. :return: list of NIfTI files found for the modality - :rtype: list """ # grep all the possible suffixes for the modality @@ -209,18 +213,15 @@ def grep_nifti_files(self): # return the list of found NIfTI files return nii_files_list - def grep_bids_files(self, bids_type, extension): + def grep_bids_files(self, bids_type: str, extension: str) -> list[BIDSFile]: """ Greps the BIDS files and their layout information from the BIDSLayout and return that list. :param bids_type: the BIDS type to use to grep files (T1w, T2w, bold, dwi...) - :type bids_type: str :param extension: extension of the file to look for (nii.gz, json...) - :type extension: str :return: list of files from the BIDS layout - :rtype: list """ if self.bids_ses_id: @@ -239,25 +240,23 @@ def grep_bids_files(self, bids_type, extension): suffix = bids_type ) - def register_raw_file(self, nifti_file): + def register_raw_file(self, nifti_file: BIDSFile): """ Registers raw MRI files and related files into the files and parameter_file tables. :param nifti_file: NIfTI file object - :type nifti_file: pybids NIfTI file object """ # insert the NIfTI file self.fetch_and_insert_nifti_file(nifti_file) - def fetch_and_insert_nifti_file(self, nifti_file, derivatives=None): + def fetch_and_insert_nifti_file(self, nifti_file: BIDSFile, derivatives=None): """ Gather NIfTI file information to insert into the files and parameter_file tables. Once all the information has been gathered, it will call imaging.insert_imaging_file that will perform the insertion into the files and parameter_file tables. :param nifti_file : NIfTI file object - :type nifti_file : pybids NIfTI file object :param derivatives: whether the file to be registered is a derivative file :type derivatives: bool diff --git a/python/lib/util.py b/python/lib/util.py new file mode 100644 index 000000000..0c176f66a --- /dev/null +++ b/python/lib/util.py @@ -0,0 +1,44 @@ +from collections.abc import Callable, Iterable, Iterator +from typing import TypeVar + +T = TypeVar('T') + + +def find(predicate: Callable[[T], bool], iterable: Iterable[T]) -> T | None: + """ + Find the first element in an iterable that satisfies a predicate, or return `None` if no match + is found. + """ + + for item in iterable: + if predicate(item): + return item + + return None + + +T = TypeVar('T') # type: ignore +U = TypeVar('U') + + +def filter_map(function: Callable[[T], U | None], iterable: Iterable[T]) -> Iterator[U]: + """ + Apply a function to each element of an iterator and yields the results that are not `None`. + """ + + for item in iterable: + result = function(item) + if result is not None: + yield result + + +def try_parse_int(value: str) -> int | None: + """ + Parse a string into an integer (base 10), or return `None` if the string does not correspond + to an integer. + """ + + try: + return int(value) + except ValueError: + return None diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index f43a46fe2..9e1000196 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -7,6 +7,10 @@ import os import re import sys +from typing import Any, Literal + +from bids import BIDSLayout +from bids.layout import BIDSFile import lib.exitcode import lib.physiological @@ -25,6 +29,11 @@ sys.path.append('/home/user/python') +bids_eeg_modalities = ['eeg', 'ieeg'] + +bids_mri_modalities = ['anat', 'dwi', 'fmap', 'func'] + + # to limit the traceback when raising exceptions. # sys.tracebacklimit = 0 @@ -101,7 +110,7 @@ def main(): print(usage) sys.exit(lib.exitcode.MISSING_ARG) - if type and type not in ('raw', 'derivative'): + if type not in (None, 'raw', 'derivative'): print("--type must be one of 'raw', 'derivative'") print(usage) sys.exit(lib.exitcode.MISSING_ARG) @@ -130,21 +139,17 @@ def main(): ) -def input_error_checking(profile, bids_dir, usage): +def input_error_checking(profile: str, bids_dir: str, usage: str) -> Any: """ Checks whether the required inputs are set and that paths are valid. If the path to the config_file file valid, then it will import the file as a module so the database connection information can be used to connect. :param profile : path to the profile file with MySQL credentials - :type profile : str :param bids_dir: path to the BIDS directory to parse and insert into LORIS - :type bids_dir: str :param usage : script usage to be displayed when encountering an error - :type usage : st :return: config_file module with database credentials (config_file.mysql) - :rtype: module """ if not profile: @@ -182,33 +187,22 @@ def input_error_checking(profile, bids_dir, usage): def read_and_insert_bids( - bids_dir, data_dir, verbose, createcand, createvisit, - idsvalidation, nobidsvalidation, type, nocopy, db + bids_dir: str, data_dir: str, verbose: bool, createcand: bool, createvisit: bool, + idsvalidation: bool, nobidsvalidation: bool, type: Literal['raw', 'derivative'] | None, nocopy: bool, db: Database, ): """ Read the provided BIDS structure and import it into the database. :param bids_dir : path to the BIDS directory - :type bids_dir : str :param data_dir : data_dir config value - :type data_dir : string :param verbose : flag for more printing if set - :type verbose : bool :param createcand : allow database candidate creation if it did not exist already - :type createcand : bool :param createvisit : allow database visit creation if it did not exist already - :type createvisit : bool :param idsvalidation : allow pscid/candid validation in the BIDS directory name - :type idsvalidation : bool :param nobidsvalidation : disable bids dataset validation - :type nobidsvalidation : bool - :param type : raw | derivative. Type of the dataset - :type type : string + :param type : Type of the dataset :param nocopy : disable bids dataset copy in assembly_bids - :type nocopy : bool :param db : db object - :type db : object - """ # grep config settings from the Config module @@ -224,7 +218,7 @@ def read_and_insert_bids( bids_reader = BidsReader(bids_dir, verbose, False) else: bids_reader = BidsReader(bids_dir, verbose) - if not bids_reader.participants_info \ + if not bids_reader.bids_participants \ or not bids_reader.cand_sessions_list \ or not bids_reader.cand_session_modalities_list: message = '\n\tERROR: could not properly parse the following' \ @@ -243,10 +237,10 @@ def read_and_insert_bids( single_project_id = None # loop through subjects - for bids_subject_info in bids_reader.participants_info: + for bids_participant in bids_reader.bids_participants: # greps BIDS information for the candidate - bids_id = bids_subject_info['participant_id'] + bids_id = bids_participant.id bids_sessions = bids_reader.cand_sessions_list[bids_id] # greps BIDS candidate's info from LORIS (creates the candidate if it @@ -266,9 +260,9 @@ def read_and_insert_bids( cohort_id = None # TODO: change subproject -> cohort in participants.tsv? - if 'subproject' in bids_subject_info: + if bids_participant.subproject is not None: # TODO: change subproject -> cohort in participants.tsv? - cohort = bids_subject_info['subproject'] + cohort = bids_participant.subproject cohort_info = db.pselect( "SELECT CohortID FROM cohort WHERE title = %s", [cohort, ] @@ -332,61 +326,81 @@ def read_and_insert_bids( hed_union=hed_union ) + # TODO: What if `loris_bids_root_dir` is `None` (nocopy) ? + loris_bids = BIDSLayout(loris_bids_root_dir) + # read list of modalities per session / candidate and register data - for row in bids_reader.cand_session_modalities_list: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_visit_rel_dir = 'sub-' + row['bids_sub_id'] + '/' + 'ses-' + visit_label - - for modality in row['modalities']: - loris_bids_modality_rel_dir = loris_bids_visit_rel_dir + '/' + modality + '/' - if not nocopy: - lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) - - if modality == 'eeg' or modality == 'ieeg': - Eeg( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type - ) - - elif modality in ['anat', 'dwi', 'fmap', 'func']: - Mri( - bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) + for subject_label, session_label, modality in bids_reader.iter_modality_combinations(): + if session_label is not None: + visit_label = session_label + else: + visit_label = default_bids_vl + + loris_bids_modality_files: list[BIDSFile] = loris_bids.get( # type: ignore + subject=subject_label, + session=visit_label, + suffix=modality, + ) + + if loris_bids_modality_files != []: + print( + 'Files already inserted in LORIS, skipping:\n' + f'- Subject: {subject_label}\n' + f'- Session: {session_label}\n' + f'- Modality: {modality}' + ) + + continue + + loris_bids_modality_rel_dir = os.path.join( + f'sub-{subject_label}', + f'ses-{visit_label}', + modality, + ) + + if not nocopy: + lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) + + if modality in bids_eeg_modalities: + Eeg( + bids_reader = bids_reader, + bids_sub_id = subject_label, + bids_ses_id = session_label, + bids_modality = modality, + db = db, + verbose = verbose, + data_dir = data_dir, + default_visit_label = default_bids_vl, + loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, + loris_bids_root_dir = loris_bids_root_dir, + dataset_tag_dict = dataset_tag_dict, + dataset_type = type + ) + elif modality in bids_mri_modalities: + Mri( + bids_reader = bids_reader, + bids_sub_id = subject_label, + bids_ses_id = session_label, + bids_modality = modality, + db = db, + verbose = verbose, + data_dir = data_dir, + default_visit_label = default_bids_vl, + loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, + loris_bids_root_dir = loris_bids_root_dir + ) # disconnect from the database db.disconnect() -def validateids(bids_dir, db, verbose): +def validateids(bids_dir: str, db: Database, verbose: bool): """ Validate that pscid and candid matches :param bids_dir : path to the BIDS directory - :type bids_dir : str :param db : database handler object - :type db : object - :param verbose : flag for more printing if set - :type verbose : bool + :param verbose : flag for more printing if set """ bids_folder = bids_dir.rstrip('/').split('/')[-1] @@ -405,21 +419,17 @@ def validateids(bids_dir, db, verbose): sys.exit(lib.exitcode.CANDIDATE_MISMATCH) -def create_loris_bids_directory(bids_reader, data_dir, verbose): +def create_loris_bids_directory(bids_reader: BidsReader, data_dir: str, verbose: bool) -> str: """ Creates the LORIS BIDS import root directory (with name and BIDS version) and copy over the dataset_description.json, README and participants.tsv files. - :param bids_reader: BIDS information handler object - :type bids_reader: object - :param data_dir : path of the LORIS data directory - :type data_dir : str - :param verbose : if true, prints out information while executing - :type verbose : bool + :param bids_reader : BIDS information handler object + :param data_dir : path of the LORIS data directory + :param verbose : if true, prints out information while executing :return: path to the LORIS BIDS import root directory - :rtype: str """ # making sure that there is a final / in bids_dir @@ -470,26 +480,21 @@ def create_loris_bids_directory(bids_reader, data_dir, verbose): return loris_bids_dirname -def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbose): +def grep_or_create_candidate_db_info( + bids_reader: BidsReader, bids_id: str, db: Database, createcand: bool, verbose: bool +) -> dict[str, Any]: """ Greps (or creates if candidate does not exist and createcand is true) the BIDS candidate in the LORIS candidate's table and return a list of candidates with their related fields from the database. :param bids_reader : BIDS information handler object - :type bids_reader : object :param bids_id : bids_id to be used (CandID or PSCID) - :type bids_id : str :param db : database handler object - :type db : object :param createcand : if true, creates the candidate in LORIS - :type createcand : bool :param verbose : if true, prints out information while executing - :type verbose : bool - :return: list of candidate's dictionaries. One entry in the list holds - a dictionary with field's values from the candidate table - :rtype: list + :return: The dictionary of the candidate database record """ candidate = Candidate(verbose=verbose, cand_id=bids_id) @@ -501,7 +506,7 @@ def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbo if not loris_cand_info and createcand: loris_cand_info = candidate.create_candidate( - db, bids_reader.participants_info + db, bids_reader.bids_participants ) if not loris_cand_info: print("Creating candidate failed. Cannot importing the files.\n") @@ -515,39 +520,27 @@ def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbo def grep_or_create_session_db_info( - bids_id, cand_id, visit_label, - db, createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy): + bids_id: str, cand_id: int, visit_label: str, db: Database, createvisit: bool, verbose: bool, + loris_bids_dir: str, center_id: int, project_id: int, cohort_id: int, nocopy: bool +) -> dict[str, Any]: """ Greps (or creates if session does not exist and createvisit is true) the BIDS session in the LORIS session's table and return a list of sessions with their related fields from the database. - :parma bids_id : BIDS ID of the session - :type bids_id : str - :param cand_id : CandID to use to create the session - :type cand_id : int - :param visit_label : Visit label to use to create the session - :type visit_label : str - :param db : database handler object - :type db : object - :param createvisit : if true, creates the session in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param center_id : CenterID to use to create the session - :type center_id : int - :param project_id : ProjectID to use to create the session - :type project_id : int - :param cohort_id : CohortID to use to create the session - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool + :parma bids_id : BIDS ID of the session + :param cand_id : CandID to use to create the session + :param visit_label : Visit label to use to create the session + :param db : database handler object + :param createvisit : if true, creates the session in LORIS + :param verbose : if true, prints out information while executing + :param loris_bids_dir : LORIS BIDS import root directory to copy data + :param center_id : CenterID to use to create the session + :param project_id : ProjectID to use to create the session + :param cohort_id : CohortID to use to create the session + :param nocopy : if true, skip the assembly_bids dataset copy :return: session information grepped from LORIS for cand_id and visit_label - :rtype: dict """ session = Session(db, verbose, cand_id, visit_label, center_id, project_id, cohort_id) @@ -567,44 +560,30 @@ def grep_or_create_session_db_info( return loris_vl_info -def grep_candidate_sessions_info(bids_ses, bids_id, cand_id, loris_bids_dir, - createvisit, verbose, db, default_vl, - center_id, project_id, cohort_id, nocopy): +def grep_candidate_sessions_info( + bids_ses: list[str], bids_id: str, cand_id: int, loris_bids_dir: str, createvisit: bool, verbose: bool, + db: Database, default_vl: str, center_id: int, project_id: int, cohort_id: int, nocopy: bool, +) -> list[dict[str, Any]]: """ Greps all session info dictionaries for a given candidate and aggregates them into a list, with one entry per session. If the session does not exist in LORIS and that createvisit is true, it will create the session first. - :param bids_ses : list of BIDS sessions to grep info or insert - :type bids_ses : list - :param bids_id : BIDS ID of the candidate - :type bids_id : str - :param cand_id : candidate's CandID - :type cand_id : int - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param createvisit : if true, creates the visits in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param db : database handler object - :type db : object - :param default_vl : default visit label from the Config module - :type default_vl : str - :param center_id : center ID associated to the candidate and visit - :type center_id : int - :param project_id : project ID associated to the candidate and visit - :type project_id : int - :param cohort_id : cohort ID associated to the candidate and visit - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - + :param bids_ses : list of BIDS sessions to grep info or insert + :param bids_id : BIDS ID of the candidate + :param cand_id : candidate's CandID + :param loris_bids_dir : LORIS BIDS import root directory to copy data + :param createvisit : if true, creates the visits in LORIS + :param verbose : if true, prints out information while executing + :param db : database handler object + :param default_vl : default visit label from the Config module + :param center_id : center ID associated to the candidate and visit + :param project_id : project ID associated to the candidate and visit + :param cohort_id : cohort ID associated to the candidate and visit + :param nocopy : if true, skip the assembly_bids dataset copy :return: list of all session's dictionaries for a given candidate - :rtype: list """ loris_sessions_info = []