From 2f607fd390ee1ec95e6dde1357c7b8ba7e434ff6 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Fri, 1 Nov 2024 17:05:46 -0400 Subject: [PATCH 1/3] update proc status column name mappings --- bagel/derivatives_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bagel/derivatives_utils.py b/bagel/derivatives_utils.py index 0e64eca..52d2293 100644 --- a/bagel/derivatives_utils.py +++ b/bagel/derivatives_utils.py @@ -9,8 +9,8 @@ # we only only look at `bids_session` right now. We should revisit this after the schema is finalized, # to see if any other logic is needed to avoid issues with session ID discrepancies across columns. PROC_STATUS_COLS = { - "participant": "bids_participant", - "session": "bids_session", + "participant": "bids_participant_id", + "session": "bids_session_id", "pipeline_name": "pipeline_name", "pipeline_version": "pipeline_version", "status": "status", From cb4ccbe11b85eb7b3366d9d011e1019097f07e86 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sat, 2 Nov 2024 20:50:50 -0400 Subject: [PATCH 2/3] add _id to identifier col names in proc status files --- bagel/tests/data/README.md | 2 +- bagel/tests/data/proc_status_missing_sessions.tsv | 2 +- bagel/tests/data/proc_status_no_bids_sessions.tsv | 2 +- bagel/tests/data/proc_status_synthetic.csv | 2 +- bagel/tests/data/proc_status_synthetic.tsv | 2 +- bagel/tests/data/proc_status_synthetic_incomplete.tsv | 2 +- bagel/tests/data/proc_status_unique_sessions.tsv | 2 +- bagel/tests/data/proc_status_unique_subs.tsv | 2 +- bagel/tests/test_cli_derivatives.py | 2 +- bagel/tests/test_utility.py | 4 ++-- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/bagel/tests/data/README.md b/bagel/tests/data/README.md index 073c90d..c5b75e9 100644 --- a/bagel/tests/data/README.md +++ b/bagel/tests/data/README.md @@ -39,7 +39,7 @@ Example file `proc_status`... | Description | Expected result _synthetic.tsv | Captures a subset of subject-sessions represented in the BIDS examples synthetic dataset | Pass _synthetic.csv | Same as proc_status_synthetic.csv, but is a CSV file | Fail _unique_subs.tsv | Includes subjects not found in the phenotypic dataset | Fail -_incomplete.tsv | Has a missing value in the `bids_participant` column | Fail +_incomplete.tsv | Has a missing value in the `bids_participant_id` column | Fail _unique_sessions.csv | Includes a unique subject-session (`sub-01`, `ses-03`) not found in the synthetic dataset | Pass _missing_sessions.tsv | One subject (`sub-02`) is missing all session labels | Pass _no_bids_sessions.tsv | Has session labels in all rows for `session_id`, but no values in `bids_session` column | Pass diff --git a/bagel/tests/data/proc_status_missing_sessions.tsv b/bagel/tests/data/proc_status_missing_sessions.tsv index 47bb84f..2ec985a 100644 --- a/bagel/tests/data/proc_status_missing_sessions.tsv +++ b/bagel/tests/data/proc_status_missing_sessions.tsv @@ -1,4 +1,4 @@ -participant_id bids_participant session_id bids_session pipeline_name pipeline_version pipeline_step status +participant_id bids_participant_id session_id bids_session_id pipeline_name pipeline_version pipeline_step status 01 sub-01 01 ses-01 fmriprep 20.2.7 step1 SUCCESS 01 sub-01 01 ses-01 fmriprep 20.2.7 step2 SUCCESS 01 sub-01 01 ses-01 fmriprep 23.1.3 default SUCCESS diff --git a/bagel/tests/data/proc_status_no_bids_sessions.tsv b/bagel/tests/data/proc_status_no_bids_sessions.tsv index b5ac516..018ee94 100644 --- a/bagel/tests/data/proc_status_no_bids_sessions.tsv +++ b/bagel/tests/data/proc_status_no_bids_sessions.tsv @@ -1,4 +1,4 @@ -participant_id bids_participant session_id bids_session pipeline_name pipeline_version pipeline_step status +participant_id bids_participant_id session_id bids_session_id pipeline_name pipeline_version pipeline_step status 01 sub-01 01 fmriprep 20.2.7 step1 SUCCESS 01 sub-01 01 fmriprep 20.2.7 step2 SUCCESS 01 sub-01 01 fmriprep 23.1.3 default SUCCESS diff --git a/bagel/tests/data/proc_status_synthetic.csv b/bagel/tests/data/proc_status_synthetic.csv index da47d84..69c75d8 100644 --- a/bagel/tests/data/proc_status_synthetic.csv +++ b/bagel/tests/data/proc_status_synthetic.csv @@ -1,4 +1,4 @@ -participant_id,bids_participant,session_id,bids_session,pipeline_name,pipeline_version,pipeline_step,status +participant_id,bids_participant_id,session_id,bids_session_id,pipeline_name,pipeline_version,pipeline_step,status 01,sub-01,1,ses-01,fmriprep,20.2.7,step1,FAIL 01,sub-01,1,ses-01,fmriprep,20.2.7,step2,INCOMPLETE 01,sub-01,1,ses-01,fmriprep,23.1.3,default,SUCCESS diff --git a/bagel/tests/data/proc_status_synthetic.tsv b/bagel/tests/data/proc_status_synthetic.tsv index e2bc680..7c22a2d 100644 --- a/bagel/tests/data/proc_status_synthetic.tsv +++ b/bagel/tests/data/proc_status_synthetic.tsv @@ -1,4 +1,4 @@ -participant_id bids_participant session_id bids_session pipeline_name pipeline_version pipeline_step status +participant_id bids_participant_id session_id bids_session_id pipeline_name pipeline_version pipeline_step status 01 sub-01 01 ses-01 fmriprep 20.2.7 step1 FAIL 01 sub-01 01 ses-01 fmriprep 20.2.7 step2 INCOMPLETE 01 sub-01 01 ses-01 fmriprep 23.1.3 default SUCCESS diff --git a/bagel/tests/data/proc_status_synthetic_incomplete.tsv b/bagel/tests/data/proc_status_synthetic_incomplete.tsv index c0308c5..6294ae7 100644 --- a/bagel/tests/data/proc_status_synthetic_incomplete.tsv +++ b/bagel/tests/data/proc_status_synthetic_incomplete.tsv @@ -1,4 +1,4 @@ -participant_id bids_participant session_id bids_session pipeline_name pipeline_version pipeline_step status +participant_id bids_participant_id session_id bids_session_id pipeline_name pipeline_version pipeline_step status 01 sub-01 01 ses-01 fmriprep 20.2.7 step1 FAIL 01 sub-01 01 ses-01 fmriprep 20.2.7 step2 INCOMPLETE 01 sub-01 01 ses-01 fmriprep 23.1.3 default SUCCESS diff --git a/bagel/tests/data/proc_status_unique_sessions.tsv b/bagel/tests/data/proc_status_unique_sessions.tsv index f12db1c..4a403e5 100644 --- a/bagel/tests/data/proc_status_unique_sessions.tsv +++ b/bagel/tests/data/proc_status_unique_sessions.tsv @@ -1,4 +1,4 @@ -participant_id bids_participant session_id bids_session pipeline_name pipeline_version pipeline_step status +participant_id bids_participant_id session_id bids_session_id pipeline_name pipeline_version pipeline_step status 01 sub-01 01 ses-01 fmriprep 20.2.7 step1 FAIL 01 sub-01 01 ses-01 fmriprep 20.2.7 step2 INCOMPLETE 01 sub-01 03 ses-03 fmriprep 23.1.3 default SUCCESS diff --git a/bagel/tests/data/proc_status_unique_subs.tsv b/bagel/tests/data/proc_status_unique_subs.tsv index 4eeb356..866d1d3 100644 --- a/bagel/tests/data/proc_status_unique_subs.tsv +++ b/bagel/tests/data/proc_status_unique_subs.tsv @@ -1,4 +1,4 @@ -participant_id bids_participant session_id bids_session pipeline_name pipeline_version pipeline_step status +participant_id bids_participant_id session_id bids_session_id pipeline_name pipeline_version pipeline_step status pd1 sub-pd1 01 ses-01 fmriprep 20.2.7 step1 FAIL pd1 sub-pd1 01 ses-01 fmriprep 20.2.7 step2 INCOMPLETE pd2 sub-pd2 01 ses-01 fmriprep 23.1.3 default SUCCESS diff --git a/bagel/tests/test_cli_derivatives.py b/bagel/tests/test_cli_derivatives.py index 7917043..5349bec 100644 --- a/bagel/tests/test_cli_derivatives.py +++ b/bagel/tests/test_cli_derivatives.py @@ -161,7 +161,7 @@ def test_derivatives_invalid_inputs_fail( # TODO: Revisit this example once the updated Nipoppy proc status file schema is available # This example assumes that # 1. It is possible to have a subject with missing values in bids_session but not in session_id - # 2. Duplicate entries of pipeline name, version, and step for an apparent subject-session based on bids_participant and bids_session + # 2. Duplicate entries of pipeline name, version, and step for an apparent subject-session based on bids_participant_id and bids_session_id # (i.e., the two columns Neurobagel looks at) are allowed (see rows 8 and 9) ("proc_status_no_bids_sessions.tsv", {"sub-01": 3, "sub-02": 2}), ], diff --git a/bagel/tests/test_utility.py b/bagel/tests/test_utility.py index 59dbc65..d3ca92f 100644 --- a/bagel/tests/test_utility.py +++ b/bagel/tests/test_utility.py @@ -761,9 +761,9 @@ def test_create_completed_pipelines(): example_ses_proc_df = pd.DataFrame.from_records( columns=[ "participant_id", - "bids_participant", + "bids_participant_id", "session_id", - "bids_session", + "bids_session_id", "pipeline_name", "pipeline_version", "pipeline_step", From 4d576e6d86148ccdbf5d3a843c512d2beac9b3da Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Sat, 2 Nov 2024 21:23:06 -0400 Subject: [PATCH 3/3] update comments --- bagel/derivatives_utils.py | 2 +- bagel/tests/data/README.md | 2 +- bagel/tests/test_cli_derivatives.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bagel/derivatives_utils.py b/bagel/derivatives_utils.py index 52d2293..8badbd0 100644 --- a/bagel/derivatives_utils.py +++ b/bagel/derivatives_utils.py @@ -6,7 +6,7 @@ # Shorthands for expected column names in a Nipoppy processing status file # TODO: While there are multiple session ID columns in a Nipoppy processing status file, -# we only only look at `bids_session` right now. We should revisit this after the schema is finalized, +# we only only look at `bids_session_id` right now. We should revisit this after the schema is finalized, # to see if any other logic is needed to avoid issues with session ID discrepancies across columns. PROC_STATUS_COLS = { "participant": "bids_participant_id", diff --git a/bagel/tests/data/README.md b/bagel/tests/data/README.md index c5b75e9..d47e758 100644 --- a/bagel/tests/data/README.md +++ b/bagel/tests/data/README.md @@ -42,7 +42,7 @@ _unique_subs.tsv | Includes subjects not found in the phenotypic dataset | Fail _incomplete.tsv | Has a missing value in the `bids_participant_id` column | Fail _unique_sessions.csv | Includes a unique subject-session (`sub-01`, `ses-03`) not found in the synthetic dataset | Pass _missing_sessions.tsv | One subject (`sub-02`) is missing all session labels | Pass -_no_bids_sessions.tsv | Has session labels in all rows for `session_id`, but no values in `bids_session` column | Pass +_no_bids_sessions.tsv | Has session labels in all rows for `session_id`, but no values in `bids_session_id` column | Pass ## Example expected CLI outputs diff --git a/bagel/tests/test_cli_derivatives.py b/bagel/tests/test_cli_derivatives.py index 5349bec..9cbcfc2 100644 --- a/bagel/tests/test_cli_derivatives.py +++ b/bagel/tests/test_cli_derivatives.py @@ -160,7 +160,7 @@ def test_derivatives_invalid_inputs_fail( ("proc_status_missing_sessions.tsv", {"sub-02": 2}), # TODO: Revisit this example once the updated Nipoppy proc status file schema is available # This example assumes that - # 1. It is possible to have a subject with missing values in bids_session but not in session_id + # 1. It is possible to have a subject with missing values in bids_session_id but not in session_id # 2. Duplicate entries of pipeline name, version, and step for an apparent subject-session based on bids_participant_id and bids_session_id # (i.e., the two columns Neurobagel looks at) are allowed (see rows 8 and 9) ("proc_status_no_bids_sessions.tsv", {"sub-01": 3, "sub-02": 2}),