Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incremental BIDS import #1211

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ select = ["E", "F", "I", "N", "UP", "W"]
[tool.pyright]
include = [
"python/tests",
"python/lib/bids",
"python/lib/db",
"python/lib/exception",
"python/lib/config_file.py",
Expand All @@ -23,6 +24,7 @@ include = [
"python/lib/get_subject_session.py",
"python/lib/logging.py",
"python/lib/make_env.py",
"python/lib/util.py",
"python/lib/validate_subject_info.py",
]
typeCheckingMode = "strict"
Expand Down
57 changes: 57 additions & 0 deletions python/lib/bids/candidate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import sys
from typing import Optional

from bids.layout import BIDSLayout
from sqlalchemy.orm import Session as Database

from lib.db.models.candidate import DbCandidate
from lib.db.queries.candidate import try_get_candidate_with_cand_id, try_get_candidate_with_psc_id
from lib.util import filter_map, try_parse_int


def get_bids_candidates(db: Database, bids_layout: BIDSLayout) -> list[DbCandidate]:
"""
Get all the candidates of a BIDS dataset from the database, using the BIDS subject labels.
"""

# Get the subject labels of the BIDS dataset.
bids_subject_labels: list[str] = bids_layout.get_subjects() # type: ignore

# Return the candidates found for each subject label.
return list(filter_map(
lambda bids_subject_label: get_bids_candidate(db, bids_subject_label),
bids_subject_labels,
))


def get_bids_candidate(db: Database, bids_subject_label: str) -> Optional[DbCandidate]:
"""
Get a candidate from the database using a BIDS subject label.
"""

# Check if the BIDS subject label looks might be a CandID.
cand_id = try_parse_int(bids_subject_label)

# If the BIDS subject label might be a CandID, try to get the candidate using it as a CandID.
if cand_id is not None:
candidate = try_get_candidate_with_cand_id(db, cand_id)
if candidate is not None:
return candidate

# Try to get the candidate using the BIDS subject label as a PSCID.
candidate = try_get_candidate_with_psc_id(db, bids_subject_label)
if candidate is not None:
return candidate

# All the candidates of the BIDS dataset should have been in the database at this stage. Print
# a warning if no candidate was found.
print(
(
f"WARNING: No candidate found for BIDS subject label '{bids_subject_label}',"
" candidate omitted from the participants file"
),
file=sys.stderr,
)

# Return `None` if no candidate is found.
return None
93 changes: 93 additions & 0 deletions python/lib/bids/participant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from dataclasses import dataclass

import dateutil.parser
from bids import BIDSLayout

import lib.utilities as utilities
from lib.db.models.candidate import DbCandidate


@dataclass
class BidsParticipant:
"""
Information about a BIDS participant represented in an entry in the `participants.tsv` file of
a BIDS dataset.
"""

id: str
birth_date: str | None = None
sex: str | None = None
age: str | None = None
site: str | None = None
cohort: str | None = None
project: str | None = None
# FIXME: Both "cohort" and "subproject" are used in scripts, this may be a bug.
subproject: str | None = None


def read_bids_participants_file(bids_layout: BIDSLayout) -> list[BidsParticipant] | None:
"""
Find, read and parse the `participants.tsv` file of a BIDS dataset. Return the BIDS participant
entries if a file is found, or `None` otherwise.
"""

# Find the `participants.tsv` file in the BIDS dataset.
bids_participants_file_path = None
for bids_file_path in bids_layout.get(suffix='participants', return_type='filename'): # type: ignore
if 'participants.tsv' in bids_file_path:
bids_participants_file_path = bids_file_path # type: ignore
break

# If no `participants.tsv` file is found, return `None`.
if bids_participants_file_path is None:
return None

# Parse the BIDS participant entries from the `participants.tsv` file.
bids_participant_rows = utilities.read_tsv_file(bids_participants_file_path) # type: ignore
return list(map(read_bids_participant_row, bids_participant_rows)) # type: ignore


def read_bids_participant_row(row: dict[str, str]) -> BidsParticipant:
"""
Get a BIDS participant entry from a parsed TSV line of a `participants.tsv` file.
"""

# Get the participant ID by removing the `sub-` prefix if it is present.
participant_id = row['participant_id'].replace('sub-', '')

# Get the participant date of birth from one of the possible date of birth fields.
birth_date = None
for birth_date_name in ['date_of_birth', 'birth_date', 'dob']:
if birth_date_name in row:
birth_date = dateutil.parser.parse(row[birth_date_name]).strftime('%Y-%m-%d')
break

# Create the BIDS participant object.
return BidsParticipant(
id = participant_id,
birth_date = birth_date,
sex = row.get('sex'),
age = row.get('age'),
site = row.get('site'),
cohort = row.get('cohort'),
project = row.get('project'),
subproject = row.get('subproject'),
)


def get_bids_participant_from_candidate(candidate: DbCandidate) -> BidsParticipant:
"""
Generate a BIDS participant entry from a database candidate.
"""

# Stringify the candidate date of birth if there is one.
birth_date = candidate.date_of_birth.strftime('%Y-%m-%d') if candidate.date_of_birth is not None else None

# Create the BIDS participant object corresponding to the database candidate.
return BidsParticipant(
id = candidate.psc_id,
birth_date = birth_date,
sex = candidate.sex,
site = candidate.registration_site.name,
project = candidate.registration_project.name,
)
Loading
Loading