From 3cef3637ca64bd2cf4ef777b1297f929a0baf9ea Mon Sep 17 00:00:00 2001 From: HarrisonWilde Date: Tue, 17 Oct 2023 14:32:12 +0100 Subject: [PATCH] Apply black formatting to notebooks --- auxiliary/MIMIC_preproc.ipynb | 223 +++++++++++++++++++------------ auxiliary/generate_SUPPORT.ipynb | 9 +- 2 files changed, 140 insertions(+), 92 deletions(-) diff --git a/auxiliary/MIMIC_preproc.ipynb b/auxiliary/MIMIC_preproc.ipynb index 3d8db638..9125f1d5 100644 --- a/auxiliary/MIMIC_preproc.ipynb +++ b/auxiliary/MIMIC_preproc.ipynb @@ -60,8 +60,8 @@ "metadata": {}, "outputs": [], "source": [ - "mimic_path = \"../data/\" # Path containing MIMIC data\n", - "output_path = \"../data/\" # Path to save the created datasets\n", + "mimic_path = \"../data/\" # Path containing MIMIC data\n", + "output_path = \"../data/\" # Path to save the created datasets\n", "\n", "admissions = pd.read_csv(mimic_path + \"ADMISSIONS.csv\")\n", "chartevents = pd.read_csv(mimic_path + \"CHARTEVENTS.csv\", nrows=10000000)\n", @@ -113,27 +113,20 @@ " < pd.to_datetime(\"1930-01-01 00:00:00\", format=\"%Y-%m-%d %H:%M:%S\")\n", " ):\n", " num_years = randrange(years_diff_behind - 80, years_diff_behind - 40)\n", - " new_dobs.append(\n", - " pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\")\n", - " + pd.DateOffset(years=num_years)\n", - " )\n", + " new_dobs.append(pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\") + pd.DateOffset(years=num_years))\n", " dob_offset.append(num_years)\n", " elif (years_diff_ahead != 0) and (\n", " pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\")\n", " > pd.to_datetime(\"2021-12-01 00:00:00\", format=\"%Y-%m-%d %H:%M:%S\")\n", " ):\n", " num_years = randrange(years_diff_ahead + 30, years_diff_ahead + 50)\n", - " new_dobs.append(\n", - " pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\")\n", - " - pd.DateOffset(years=num_years)\n", - " )\n", + " new_dobs.append(pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\") - pd.DateOffset(years=num_years))\n", " dob_offset.append(-num_years)\n", " else:\n", " new_dobs.append(pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\"))\n", " dob_offset.append(0)\n", "patients[\"DOB\"] = new_dobs\n", - "patients[\"DOB_offset\"] = dob_offset\n", - "\n" + "patients[\"DOB_offset\"] = dob_offset" ] }, { @@ -163,39 +156,57 @@ "source": [ "seed(2021)\n", "# Generate small input data file\n", - "mimic_table = admissions[['SUBJECT_ID','ETHNICITY','ADMITTIME','DISCHTIME','DISCHARGE_LOCATION']]\n", - "mimic_table = mimic_table.merge(patients[['SUBJECT_ID','GENDER','DOB']])\n", - "mimic_table = mimic_table.merge(icustays[['SUBJECT_ID','ICUSTAY_ID','FIRST_CAREUNIT']])\n", - "one_per_pat = chartevents.drop_duplicates(subset=['SUBJECT_ID','ICUSTAY_ID'])\n", - "mimic_table = mimic_table.merge(one_per_pat[['SUBJECT_ID','ICUSTAY_ID','CHARTTIME','ITEMID','VALUE','VALUEUOM']],on=['SUBJECT_ID','ICUSTAY_ID'])\n", + "mimic_table = admissions[[\"SUBJECT_ID\", \"ETHNICITY\", \"ADMITTIME\", \"DISCHTIME\", \"DISCHARGE_LOCATION\"]]\n", + "mimic_table = mimic_table.merge(patients[[\"SUBJECT_ID\", \"GENDER\", \"DOB\"]])\n", + "mimic_table = mimic_table.merge(icustays[[\"SUBJECT_ID\", \"ICUSTAY_ID\", \"FIRST_CAREUNIT\"]])\n", + "one_per_pat = chartevents.drop_duplicates(subset=[\"SUBJECT_ID\", \"ICUSTAY_ID\"])\n", + "mimic_table = mimic_table.merge(\n", + " one_per_pat[[\"SUBJECT_ID\", \"ICUSTAY_ID\", \"CHARTTIME\", \"ITEMID\", \"VALUE\", \"VALUEUOM\"]],\n", + " on=[\"SUBJECT_ID\", \"ICUSTAY_ID\"],\n", + ")\n", "\n", "new_admits = []\n", "new_dischs = []\n", "new_chart = []\n", "\n", "for index, row in tqdm(mimic_table.iterrows(), total=mimic_table.shape[0]):\n", - " admit_min = len(pd.date_range(start=pd.to_datetime(row['DOB'],format='%Y-%m-%d %H:%M:%S'),end=pd.to_datetime('2021-12-01 00:00:00',format='%Y-%m-%d %H:%M:%S'),freq='D'))\n", - " stay_len = len(pd.date_range(start=pd.to_datetime(row['ADMITTIME'],format='%Y-%m-%d %H:%M:%S'),end=pd.to_datetime(row['DISCHTIME'],format='%Y-%m-%d %H:%M:%S'),freq='S'))\n", + " admit_min = len(\n", + " pd.date_range(\n", + " start=pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\"),\n", + " end=pd.to_datetime(\"2021-12-01 00:00:00\", format=\"%Y-%m-%d %H:%M:%S\"),\n", + " freq=\"D\",\n", + " )\n", + " )\n", + " stay_len = len(\n", + " pd.date_range(\n", + " start=pd.to_datetime(row[\"ADMITTIME\"], format=\"%Y-%m-%d %H:%M:%S\"),\n", + " end=pd.to_datetime(row[\"DISCHTIME\"], format=\"%Y-%m-%d %H:%M:%S\"),\n", + " freq=\"S\",\n", + " )\n", + " )\n", "\n", - " num_days_admit = randrange(np.round(admit_min*0.25).astype(int),np.round(admit_min*0.9).astype(int)+5)\n", - " num_days_disch = randrange(0,50)\n", - " num_secs_chart = randrange(np.round(stay_len*0.01).astype(int)+1,np.round(stay_len*0.99).astype(int)+10)\n", - " new_admit_date = pd.to_datetime(row['DOB'],format='%Y-%m-%d %H:%M:%S') + pd.DateOffset(days=num_days_admit)\n", + " num_days_admit = randrange(np.round(admit_min * 0.25).astype(int), np.round(admit_min * 0.9).astype(int) + 5)\n", + " num_days_disch = randrange(0, 50)\n", + " num_secs_chart = randrange(np.round(stay_len * 0.01).astype(int) + 1, np.round(stay_len * 0.99).astype(int) + 10)\n", + " new_admit_date = pd.to_datetime(row[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\") + pd.DateOffset(days=num_days_admit)\n", " new_admits.append(new_admit_date)\n", " new_dischs.append(new_admit_date + pd.DateOffset(days=num_days_disch))\n", " new_chart.append(new_admit_date + pd.DateOffset(seconds=num_secs_chart))\n", "\n", - "mimic_table['ADMITTIME'] = new_admits\n", - "mimic_table['DISCHTIME'] = new_dischs\n", - "mimic_table['CHARTTIME'] = new_chart\n", - "\n", - "mimic_table = mimic_table[(pd.to_datetime(mimic_table.ADMITTIME) < pd.to_datetime(mimic_table.CHARTTIME)) & (pd.to_datetime(mimic_table.DISCHTIME) > pd.to_datetime(mimic_table.CHARTTIME))]\n", - "mimic_table = mimic_table.merge(items[['ITEMID','LABEL']],on=['ITEMID'])\n", - "mimic_table.drop(['ICUSTAY_ID','ITEMID'],axis=1,inplace=True)\n", - "age_calc = pd.Timestamp('2021-12-01 00:00:00')\n", - "mimic_table['DOB'] = pd.to_datetime(mimic_table['DOB'], format='%Y-%m-%d %H:%M:%S')\n", - "mimic_table['age'] = (age_calc - mimic_table['DOB']).astype(' pd.to_datetime(mimic_table.CHARTTIME))\n", + "]\n", + "mimic_table = mimic_table.merge(items[[\"ITEMID\", \"LABEL\"]], on=[\"ITEMID\"])\n", + "mimic_table.drop([\"ICUSTAY_ID\", \"ITEMID\"], axis=1, inplace=True)\n", + "age_calc = pd.Timestamp(\"2021-12-01 00:00:00\")\n", + "mimic_table[\"DOB\"] = pd.to_datetime(mimic_table[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\")\n", + "mimic_table[\"age\"] = (age_calc - mimic_table[\"DOB\"]).astype(\" pd.to_datetime(mimic_table.CHARTTIME))]\n", - "mimic_table = mimic_table.merge(items[['ITEMID','LABEL']],on=['ITEMID'])\n", - "mimic_table.drop(['ICUSTAY_ID','ITEMID'],axis=1,inplace=True)\n", - "age_calc = pd.Timestamp('2021-12-01 00:00:00')\n", - "mimic_table['DOB'] = pd.to_datetime(mimic_table['DOB'], format='%Y-%m-%d %H:%M:%S')\n", - "mimic_table['age'] = (age_calc - mimic_table['DOB']).astype(' pd.to_datetime(mimic_table.CHARTTIME))\n", + "]\n", + "mimic_table = mimic_table.merge(items[[\"ITEMID\", \"LABEL\"]], on=[\"ITEMID\"])\n", + "mimic_table.drop([\"ICUSTAY_ID\", \"ITEMID\"], axis=1, inplace=True)\n", + "age_calc = pd.Timestamp(\"2021-12-01 00:00:00\")\n", + "mimic_table[\"DOB\"] = pd.to_datetime(mimic_table[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\")\n", + "mimic_table[\"age\"] = (age_calc - mimic_table[\"DOB\"]).astype(\" pd.to_datetime(mimic_table.CHARTTIME))]\n", - "mimic_table = mimic_table.merge(items[['ITEMID','LABEL']],on=['ITEMID'])\n", - "mimic_table.drop(['ICUSTAY_ID','ITEMID'],axis=1,inplace=True)\n", - "age_calc = pd.Timestamp('2021-12-01 00:00:00')\n", - "mimic_table['DOB'] = pd.to_datetime(mimic_table['DOB'], format='%Y-%m-%d %H:%M:%S')\n", - "mimic_table['age'] = (age_calc - mimic_table['DOB']).astype(' pd.to_datetime(mimic_table.CHARTTIME))\n", + "]\n", + "mimic_table = mimic_table.merge(items[[\"ITEMID\", \"LABEL\"]], on=[\"ITEMID\"])\n", + "mimic_table.drop([\"ICUSTAY_ID\", \"ITEMID\"], axis=1, inplace=True)\n", + "age_calc = pd.Timestamp(\"2021-12-01 00:00:00\")\n", + "mimic_table[\"DOB\"] = pd.to_datetime(mimic_table[\"DOB\"], format=\"%Y-%m-%d %H:%M:%S\")\n", + "mimic_table[\"age\"] = (age_calc - mimic_table[\"DOB\"]).astype(\"