-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathget_additional_modalities_from_fmriresults01id.py
94 lines (74 loc) · 3.72 KB
/
get_additional_modalities_from_fmriresults01id.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# ---
# jupyter:
# jupytext:
# formats: ipynb,py:percent
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.16.1
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---
# %% [markdown] jp-MarkdownHeadingCollapsed=true
# This script processes the output of `sample_abcd_dmri_sites_equally.py` to grab the imaging data s3 links for other modalities besides just DMRI.
#
# We need the tables `fmriresults01.txt`, `abcd_y_lt.csv`, and `mri_y_adm_info.csv` from [the ABCD study](https://wiki.abcdstudy.org/release-notes/start-page.html). We need the output `sample_site_table.csv` from `sample_abcd_dmri_sites_equally.py`. This script is meant for ABCD Study release 5.0 or 5.1. In this script I use two versions of `fmriresults01.txt`, since I created a separate data package that has the t1 and t2 but not the dmri.
# %%
import pandas as pd
import numpy as np
from pathlib import Path
# %%
# Edit these appropriately
data_root_path = Path("/data/ebrahim-data/abcd")
fmriresults01_dmri_path = data_root_path/"Package_1224700/fmriresults01.txt"
fmriresults01_t1t2_path = data_root_path/"Package_1227348/fmriresults01.txt"
abcd_y_lt_path = data_root_path/"abcd-5.0-tabular-data-extracted/core/abcd-general/abcd_y_lt.csv"
mri_y_adm_info_path = data_root_path/"abcd-5.0-tabular-data-extracted/core/imaging/mri_y_adm_info.csv"
sample_site_table_path = Path("./sample_site_table.csv")
# %%
def read_fmriresults_file_and_merge_other_tables(fmriresults01_path):
df = pd.read_csv(fmriresults01_path, delimiter='\t', low_memory=False, dtype={'interview_age':int}, skiprows=[1])
lt = pd.read_csv(abcd_y_lt_path)
lt = lt.dropna(axis=0,subset=['interview_age'])
lt.interview_age = lt.interview_age.astype(int)
mri_info = pd.read_csv(mri_y_adm_info_path)
df = df.merge(
lt[['src_subject_id', 'interview_age', 'site_id_l', 'eventname']],
on=['src_subject_id', 'interview_age'],
how='left'
)
df = df.merge(
mri_info[['src_subject_id', 'eventname', 'mri_info_manufacturer', 'mri_info_manufacturersmn']],
on=['src_subject_id', 'eventname'],
how='left'
)
df = df.dropna(subset = ['mri_info_manufacturer'])
df = df.reset_index(drop=True)
return df
# %%
df_dmri = read_fmriresults_file_and_merge_other_tables(fmriresults01_dmri_path)
df_t1t2 = read_fmriresults_file_and_merge_other_tables(fmriresults01_t1t2_path)
# %%
fmriresults01_ids = pd.read_csv(sample_site_table_path).fmriresults01_id
df_rows_from_original_dmri_sample = df_dmri[df_dmri.fmriresults01_id.isin(fmriresults01_ids)]
desired_subject_and_event_series = pd.Series(zip(df_rows_from_original_dmri_sample.src_subject_id, df_rows_from_original_dmri_sample.eventname))
def get_scan_type_for_sampled_interviews(df, scan_type):
df_subject_and_event_series = pd.Series(zip(df.src_subject_id, df.eventname))
return df[df_subject_and_event_series.isin(desired_subject_and_event_series) & df.scan_type.str.contains(scan_type,case=False)].copy()
df_sample = pd.concat(
[
get_scan_type_for_sampled_interviews(df_t1t2, 't1'),
get_scan_type_for_sampled_interviews(df_t1t2, 't2'),
],
axis=0,
)
# %%
# Save S3 links list for use with nda-tools downloader
with open('sample_s3_links_t1t2.txt', 'w') as f:
print('\n'.join(df_sample.derived_files), file=f)
# Save table that maps filename to site id so we can group images by site if needed later
df_sample['filename'] = df_sample.derived_files.apply(lambda s : s.split('/')[-1])
df_sample[['filename', 'site_id_l', 'mri_info_manufacturer', 'fmriresults01_id',]].to_csv('sample_site_table_t1t2.csv', index=False)