diff --git a/pyproject.toml b/pyproject.toml index 78821a8e6..65da13b12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,8 +14,11 @@ select = ["E", "W"] # `test` directory. [tool.pyright] include = [ + "python/dicom_archive.py", + "python/dicom_summary.py", "python/tests", "python/lib/db", + "python/lib/dicom", "python/lib/exception", "python/lib/validate_subject_ids.py", ] diff --git a/python/dicom_archive.py b/python/dicom_archive.py new file mode 100644 index 000000000..7271b0e9e --- /dev/null +++ b/python/dicom_archive.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python + +from dataclasses import dataclass +from typing import Any, cast +import gzip +import os +import shutil +import sys +import tarfile + +from lib.db.connect import connect_to_db +import lib.dicom.dicom_database +import lib.dicom.dicom_log +import lib.dicom.summary_make +import lib.dicom.summary_write +import lib.dicom.text +import lib.exitcode +from lib.lorisgetopt import LorisGetOpt +from lib.db.model.dicom_archive import DbDicomArchive +from lib.db.query.dicom_archive import try_get_dicom_archive_with_study_uid + + +def print_error_exit(message: str, code: int): + print(f'ERROR: {message}', file=sys.stderr) + sys.exit(code) + + +def print_warning(message: str): + print(f'WARNING: {message}', file=sys.stderr) + + +@dataclass +class Args: + profile: str | None + source: str + target: str + today: bool + year: bool + overwrite: bool + db_insert: bool + db_update: bool + verbose: bool + + def __init__(self, options_dict: dict[str, Any]): + self.profile = options_dict['profile']['value'] + self.source = options_dict['source']['value'] + self.target = options_dict['target']['value'] + self.today = options_dict['today']['value'] + self.year = options_dict['year']['value'] + self.overwrite = options_dict['overwrite']['value'] + self.db_insert = options_dict['db-insert']['value'] + self.db_update = options_dict['db-update']['value'] + self.verbose = options_dict['verbose']['value'] + + +def main(): + def check_create_file(path: str): + if os.path.exists(path): + if args.overwrite: + print_warning(f'Overwriting \'{path}\'') + else: + print_error_exit( + ( + f'File or directory \'{path}\' already exists. ' + 'Use option \'--overwrite\' to overwrite it.' + ), + lib.exitcode.TARGET_EXISTS_NO_CLOBBER, + ) + + usage = ( + "\n" + + "********************************************************************\n" + " DICOM ARCHIVING SCRIPT\n" + "********************************************************************\n" + "The program reads a DICOM directory, processes it into a structured and " + "compressed archive, and insert it or upload it to the LORIS database." + + "usage : dicom_archive.py -p -s -t ...\n\n" + + "options: \n" + "\t-p, --profile : Name of the python database config file in dicom-archive/.loris_mri\n" + "\t-s, --source : Source directory containing the DICOM files to archive\n" + "\t-t, --target : Directory in which to place the resulting DICOM archive\n" + "\t --today : Use today's date as the scan date instead of the DICOM scan date\n" + "\t --year : Create the archive in a year subdirectory (example: 2024/DCM_2024-08-27_FooBar.tar)s\n" + "\t --overwrite : Overwrite the DICOM archive file if it already exists\n" + "\t --db-insert : Insert the created DICOM archive in the database (requires the archive\n" + "\t to not be already inserted)\n" + "\t --db-update : Update the DICOM archive in the database (requires the archive to be\n" + "\t already be inserted), generally used with --overwrite" + "\t-v, --verbose : If set, be verbose\n\n" + + "required options are: \n" + "\t--profile\n" + "\t--source\n" + "\t--target\n\n" + ) + + # NOTE: Some options do not have short options but LorisGetOpt does not support that, so we + # repeat the long names. + options_dict = { + "profile": { + "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "source": { + "value": None, "required": True, "expect_arg": True, "short_opt": "s", "is_path": True, + }, + "target": { + "value": None, "required": True, "expect_arg": True, "short_opt": "t", "is_path": True, + }, + "today": { + "value": False, "required": False, "expect_arg": False, "short_opt": "today", "is_path": False, + }, + "year": { + "value": False, "required": False, "expect_arg": False, "short_opt": "year", "is_path": False, + }, + "overwrite": { + "value": False, "required": False, "expect_arg": False, "short_opt": "overwrite", "is_path": False, + }, + "db-insert": { + "value": False, "required": False, "expect_arg": False, "short_opt": "db-insert", "is_path": False, + }, + "db-update": { + "value": False, "required": False, "expect_arg": False, "short_opt": "db-update", "is_path": False, + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and connect to the database + + loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + args = Args(loris_getopt_obj.options_dict) + + db = connect_to_db(cast(Any, loris_getopt_obj.config_info).mysql) + + # Check arguments + + if args.db_insert and args.db_update: + print_error_exit( + 'Arguments \'--db-insert\' and \'--db-update\' must not be set both at the same time.', + lib.exitcode.INVALID_ARG, + ) + + if not os.path.isdir(args.source) or not os.access(args.source, os.R_OK): + print_error_exit( + 'Argument \'--source\' must be a readable directory path.', + lib.exitcode.INVALID_ARG, + ) + + if not os.path.isdir(args.target) or not os.access(args.target, os.W_OK): + print_error_exit( + 'Argument \'--target\' must be a writable directory path.', + lib.exitcode.INVALID_ARG, + ) + + # Check paths + + base_name = os.path.basename(args.source) + + tar_path = f'{args.target}/{base_name}.tar' + zip_path = f'{args.target}/{base_name}.tar.gz' + summary_path = f'{args.target}/{base_name}.meta' + log_path = f'{args.target}/{base_name}.log' + + check_create_file(tar_path) + check_create_file(zip_path) + check_create_file(summary_path) + check_create_file(log_path) + + print('Extracting DICOM information (may take a long time)') + + summary = lib.dicom.summary_make.make(args.source, args.verbose) + + print('Checking database presence') + + db_archive = try_get_dicom_archive_with_study_uid(db, summary.info.study_uid) + + if args.db_insert and db_archive is not None: + print_error_exit( + ( + f'Study \'{summary.info.study_uid}\' is already inserted in the database\n' + 'Previous archiving log:\n' + f'{db_archive.create_info}' + ), + lib.exitcode.INSERT_FAILURE, + ) + + if args.db_update and db_archive is None: + print_error_exit( + f'No study \'{summary.info.study_uid}\' found in the database', + lib.exitcode.UPDATE_FAILURE, + ) + + print('Copying into DICOM tar') + + with tarfile.open(tar_path, 'w') as tar: + for file in os.listdir(args.source): + tar.add(args.source + '/' + file) + + print('Calculating DICOM tar MD5 sum') + + tarball_md5_sum = lib.dicom.text.make_hash(tar_path, True) + + print('Zipping DICOM tar (may take a long time)') + + with open(tar_path, 'rb') as tar: + # 6 is the default compression level of the tar command, Python's + # default is 9, which is more powerful but also too slow. + with gzip.open(zip_path, 'wb', compresslevel=6) as zip: + shutil.copyfileobj(tar, zip) + + print('Calculating DICOM zip MD5 sum') + + zipball_md5_sum = lib.dicom.text.make_hash(zip_path, True) + + print('Getting DICOM scan date') + + if not args.today and summary.info.scan_date is None: + print_warning(( + 'No scan date was found in the DICOMs, ' + 'consider using argument \'--today\' to use today\'s date as the scan date.' + )) + + if args.year and summary.info.scan_date is None: + print_warning(( + 'Argument \'--year\' was provided but no scan date was found in the DICOMs, ' + 'the argument will be ignored.' + )) + + if args.year and summary.info.scan_date is not None: + dir_path = f'{args.target}/{summary.info.scan_date.year}' + if not os.path.exists(dir_path): + print(f'Creating directory \'{dir_path}\'') + os.mkdir(dir_path) + elif not os.path.isdir(dir_path) or not os.access(dir_path, os.W_OK): + print_error_exit( + f'Path \'{dir_path}\' exists but is not a writable directory.', + lib.exitcode.CREATE_DIR_FAILURE, + ) + else: + dir_path = args.target + + if summary.info.scan_date is not None: + scan_date_string = lib.dicom.text.write_date(summary.info.scan_date) + archive_path = f'{dir_path}/DCM_{scan_date_string}_{base_name}.tar' + else: + archive_path = f'{dir_path}/DCM_{base_name}.tar' + + check_create_file(archive_path) + + log = lib.dicom.dicom_log.make(args.source, archive_path, tarball_md5_sum, zipball_md5_sum) + + if args.verbose: + print('The archive will be created with the following arguments:') + print(lib.dicom.dicom_log.write_to_string(log)) + + print('Writing summary file') + + lib.dicom.summary_write.write_to_file(summary_path, summary) + + print('Writing log file') + + lib.dicom.dicom_log.write_to_file(log_path, log) + + print('Copying into DICOM archive') + + with tarfile.open(archive_path, 'w') as tar: + tar.add(zip_path, os.path.basename(zip_path)) + tar.add(summary_path, os.path.basename(summary_path)) + tar.add(log_path, os.path.basename(log_path)) + + print('Removing temporary files') + + os.remove(tar_path) + os.remove(zip_path) + os.remove(summary_path) + os.remove(log_path) + + print('Calculating DICOM tar MD5 sum') + + log.archive_md5_sum = lib.dicom.text.make_hash(log.target_path, True) + + if args.db_insert: + lib.dicom.dicom_database.insert(db, log, summary) + + if args.db_update: + # Safe because we checked previously that the DICOM archive is not `None` + db_archive = cast(DbDicomArchive, db_archive) + lib.dicom.dicom_database.update(db, db_archive, log, summary) + + print('Success') + + +if __name__ == "__main__": + main() diff --git a/python/dicom_summary.py b/python/dicom_summary.py new file mode 100644 index 000000000..b1abede34 --- /dev/null +++ b/python/dicom_summary.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +import argparse +from dataclasses import dataclass +import sys +import traceback + +import lib.dicom.summary_make +import lib.dicom.summary_write +import lib.exitcode + +parser = argparse.ArgumentParser(description=( + 'Read a DICOM directory and print the DICOM summary of this directory ' + 'in the the console.' + )) + +parser.add_argument( + 'directory', + help='The DICOM directory') + +parser.add_argument( + '--verbose', + action='store_true', + help='Set the script to be verbose') + + +@dataclass +class Args: + directory: str + verbose: bool + + +def main(): + parsed_args = parser.parse_args() + args = Args(parsed_args.directory, parsed_args.verbose) + + try: + summary = lib.dicom.summary_make.make(args.directory, args.verbose) + except Exception as e: + print(f'ERROR: Cannot create a summary for the directory \'{args.directory}\'.', file=sys.stderr) + print('Exception message:', file=sys.stderr) + print(e, file=sys.stderr) + traceback.print_exc(file=sys.stderr) + exit(lib.exitcode.INVALID_DICOM) + + print(lib.dicom.summary_write.write_to_string(summary)) + + +if __name__ == "__main__": + main() diff --git a/python/lib/db/model/dicom_archive_series.py b/python/lib/db/model/dicom_archive_series.py index eb8385834..6f903f7b4 100644 --- a/python/lib/db/model/dicom_archive_series.py +++ b/python/lib/db/model/dicom_archive_series.py @@ -1,6 +1,6 @@ from typing import List, Optional -from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy import ForeignKey +from sqlalchemy.orm import Mapped, mapped_column, relationship from lib.db.base import Base import lib.db.model.dicom_archive as db_dicom_archive import lib.db.model.dicom_archive_file as db_dicom_archive_file diff --git a/python/lib/db/model/mri_upload.py b/python/lib/db/model/mri_upload.py index 89c3410f2..ac0080c64 100644 --- a/python/lib/db/model/mri_upload.py +++ b/python/lib/db/model/mri_upload.py @@ -22,7 +22,7 @@ class DbMriUpload(Base): dicom_archive_id : Mapped[Optional[int]] \ = mapped_column('TarchiveID', ForeignKey('tarchive.TarchiveID')) dicom_archive : Mapped[Optional['db_dicom_archive.DbDicomArchive']] \ - = relationship('DicomArchive', back_populates='upload') + = relationship('DbDicomArchive', back_populates='upload') session_id : Mapped[Optional[int]] = mapped_column('SessionID') is_candidate_info_validated : Mapped[Optional[bool]] = mapped_column('IsCandidateInfoValidated') is_dicom_archive_validated : Mapped[bool] = mapped_column('IsTarchiveValidated') diff --git a/python/lib/db/query/dicom_archive.py b/python/lib/db/query/dicom_archive.py new file mode 100644 index 000000000..41885e7ef --- /dev/null +++ b/python/lib/db/query/dicom_archive.py @@ -0,0 +1,51 @@ +from typing import Optional +from sqlalchemy import delete, select +from sqlalchemy.orm import Session as Database + +from lib.db.model.dicom_archive import DbDicomArchive +from lib.db.model.dicom_archive_file import DbDicomArchiveFile +from lib.db.model.dicom_archive_series import DbDicomArchiveSeries + + +def try_get_dicom_archive_with_study_uid(db: Database, study_uid: str): + """ + Get a DICOM archive from the database using its study UID, or return `None` if no DICOM + archive is found. + """ + + query = select(DbDicomArchive).where(DbDicomArchive.study_uid == study_uid) + return db.execute(query).scalar_one_or_none() + + +def delete_dicom_archive_file_series(db: Database, dicom_archive: DbDicomArchive): + """ + Delete from the database all the DICOM archive files and series associated with a DICOM + archive. + """ + + db.execute(delete(DbDicomArchiveFile) + .where(DbDicomArchiveFile.archive_id == dicom_archive.id)) + + db.execute(delete(DbDicomArchiveSeries) + .where(DbDicomArchiveSeries.archive_id == dicom_archive.id)) + + +def get_dicom_archive_series_with_file_info( + db: Database, + series_uid: str, + series_number: int, + echo_time: Optional[float], + sequence_name: Optional[str], +): + """ + Get a DICOM archive series from the database using its file information, or raise an + exception if no DICOM archive series is found. + """ + + query = select(DbDicomArchiveSeries) \ + .where(DbDicomArchiveSeries.series_uid == series_uid) \ + .where(DbDicomArchiveSeries.series_number == series_number) \ + .where(DbDicomArchiveSeries.echo_time == echo_time) \ + .where(DbDicomArchiveSeries.sequence_name == sequence_name) + + return db.execute(query).scalar_one() diff --git a/python/lib/dicom/dicom_database.py b/python/lib/dicom/dicom_database.py new file mode 100644 index 000000000..8367962ec --- /dev/null +++ b/python/lib/dicom/dicom_database.py @@ -0,0 +1,134 @@ +from datetime import datetime +from sqlalchemy.orm import Session as Database +from lib.db.model.dicom_archive import DbDicomArchive +from lib.db.model.dicom_archive_file import DbDicomArchiveFile +from lib.db.model.dicom_archive_series import DbDicomArchiveSeries +from lib.db.query.dicom_archive import delete_dicom_archive_file_series, get_dicom_archive_series_with_file_info +from lib.dicom.summary_type import Summary +from lib.dicom.dicom_log import DicomArchiveLog +import lib.dicom.text +import lib.dicom.summary_write +import lib.dicom.dicom_log + + +def populate_dicom_archive( + dicom_archive: DbDicomArchive, + log: DicomArchiveLog, + summary: Summary, + archive_path: str, + session_id: int | None, +): + """ + Populate a DICOM archive with information from its DICOM archiving log and DICOM summary. + + :param dicom_archive: The DICOM archive ORM object to populate. + :param log: The DICOM arching log object. + :param summary: The DICOM summary object. + :param session_id: The optional session ID associated with the DICOM archive. + """ + + dicom_archive.study_uid = summary.info.study_uid + dicom_archive.patient_id = summary.info.patient.id + dicom_archive.patient_name = summary.info.patient.name + dicom_archive.patient_birthdate = summary.info.patient.birth_date + dicom_archive.patient_sex = summary.info.patient.sex + dicom_archive.neuro_db_center_name = None + dicom_archive.center_name = summary.info.institution or '' + dicom_archive.last_update = None + dicom_archive.date_acquired = summary.info.scan_date + dicom_archive.date_last_archived = datetime.now() + dicom_archive.acquisition_count = len(summary.acquis) + dicom_archive.dicom_file_count = len(summary.dicom_files) + dicom_archive.non_dicom_file_count = len(summary.other_files) + dicom_archive.md5_sum_dicom_only = log.tarball_md5_sum + dicom_archive.md5_sum_archive = log.archive_md5_sum + dicom_archive.creating_user = log.creator_name + dicom_archive.sum_type_version = log.summary_version + dicom_archive.tar_type_version = log.archive_version + dicom_archive.source_location = log.source_path + dicom_archive.archive_location = archive_path + dicom_archive.scanner_manufacturer = summary.info.scanner.manufacturer + dicom_archive.scanner_model = summary.info.scanner.model + dicom_archive.scanner_serial_number = summary.info.scanner.serial_number + dicom_archive.scanner_software_version = summary.info.scanner.software_version + dicom_archive.session_id = session_id + dicom_archive.upload_attempt = 0 + dicom_archive.create_info = lib.dicom.dicom_log.write_to_string(log) + dicom_archive.acquisition_metadata = lib.dicom.summary_write.write_to_string(summary) + dicom_archive.date_sent = None + dicom_archive.pending_transfer = 0 + + +def insert(db: Database, log: DicomArchiveLog, summary: Summary): + """ + Insert a DICOM archive into the database. + + :param db: The database. + :param log: The archiving log of the DICOM archive. + :param summary: The summary of the DICOM archive. + """ + + dicom_archive = DbDicomArchive() + populate_dicom_archive(dicom_archive, log, summary, 'TODO', None) + dicom_archive.date_first_archived = datetime.now() + db.add(dicom_archive) + insert_files_series(db, dicom_archive, summary) + return dicom_archive + + +def insert_files_series(db: Database, dicom_archive: DbDicomArchive, summary: Summary): + for acqui in summary.acquis: + db.add(DbDicomArchiveSeries( + archive_id = dicom_archive.id, + series_number = acqui.series_number, + series_description = acqui.series_description, + sequence_name = acqui.sequence_name, + echo_time = acqui.echo_time, + repetition_time = acqui.repetition_time, + inversion_time = acqui.inversion_time, + slice_thickness = acqui.slice_thickness, + phase_encoding = acqui.phase_encoding, + number_of_files = acqui.number_of_files, + series_uid = acqui.series_uid, + modality = acqui.modality, + )) + + for file in summary.dicom_files: + series = get_dicom_archive_series_with_file_info( + db, + file.series_uid or '', + file.series_number or 1, + file.echo_time, + file.sequence_name or '', + ) + + db.add(DbDicomArchiveFile( + archive_id = dicom_archive.id, + series_number = file.series_number, + file_number = file.file_number, + echo_number = file.echo_number, + series_description = file.series_description, + md5_sum = file.md5_sum, + file_name = file.file_name, + series_id = series.id, + )) + + +def update(db: Database, dicom_archive: DbDicomArchive, log: DicomArchiveLog, summary: Summary): + """ + Insert a DICOM archive into the database. + + :param db: The database. + :param archive: The DICOM archive to update. + :param log: The archiving log of the DICOM archive. + :param summary: The summary of the DICOM archive. + """ + + # Delete the associated database DICOM files and series. + delete_dicom_archive_file_series(db, dicom_archive) + + # Update the database record with the new DICOM information. + populate_dicom_archive(dicom_archive, log, summary, 'TODO', None) + + # Insert the new DICOM files and series. + insert_files_series(db, dicom_archive, summary) diff --git a/python/lib/dicom/dicom_log.py b/python/lib/dicom/dicom_log.py new file mode 100644 index 000000000..37fc18503 --- /dev/null +++ b/python/lib/dicom/dicom_log.py @@ -0,0 +1,73 @@ +from dataclasses import dataclass +from datetime import datetime +import os +import socket +from lib.dicom.text_dict import DictWriter + + +@dataclass +class DicomArchiveLog: + """ + DICOM archiving log object, containg information about the archiving of a + DICOM directory. + """ + + source_path: str + target_path: str + creator_host: str + creator_os: str + creator_name: str + archive_date: str + summary_version: int + archive_version: int + tarball_md5_sum: str + zipball_md5_sum: str + archive_md5_sum: str + + +def write_to_string(log: DicomArchiveLog): + """ + Serialize a DICOM archiving log object into a string. + """ + return DictWriter([ + ('Taken from dir' , log.source_path), + ('Archive target location' , log.target_path), + ('Name of creating host' , log.creator_host), + ('Name of host OS' , log.creator_os), + ('Created by user' , log.creator_name), + ('Archived on' , log.archive_date), + ('dicomSummary version' , log.summary_version), + ('dicomTar version' , log.archive_version), + ('md5sum for DICOM tarball' , log.tarball_md5_sum), + ('md5sum for DICOM tarball gzipped' , log.zipball_md5_sum), + ('md5sum for complete archive' , log.archive_md5_sum), + ]).write() + + +def write_to_file(file_path: str, log: DicomArchiveLog): + """ + Serialize a DICOM archiving log object into a text file. + """ + string = write_to_string(log) + with open(file_path, 'w') as file: + file.write(string) + + +def make(source: str, target: str, tarball_md5_sum: str, zipball_md5_sum: str): + """ + Create a DICOM archiving log object from the provided arguments on a DICOM + directory, as well as the current execution environment. + """ + return DicomArchiveLog( + source, + target, + socket.gethostname(), + os.uname().sysname, + os.environ['USER'], + datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'), + 2, + 2, + tarball_md5_sum, + zipball_md5_sum, + 'Provided in database only', + ) diff --git a/python/lib/dicom/summary_make.py b/python/lib/dicom/summary_make.py new file mode 100644 index 000000000..60f289eaf --- /dev/null +++ b/python/lib/dicom/summary_make.py @@ -0,0 +1,216 @@ +from functools import cmp_to_key +import os +import pydicom +import pydicom.errors +from lib.dicom.summary_type import Summary, Info, Patient, Scanner, Acquisition, DicomFile, OtherFile +from lib.dicom.text import make_hash, read_dicom_date_none +from lib.utilities import get_all_files + + +def get_value(dicom: pydicom.Dataset, tag: str): + """ + Get a required value from a DICOM. + """ + + if tag not in dicom: + raise Exception(f'Expected DICOM tag \'{tag}\' but found none.') + + return dicom[tag].value + + +def get_value_none(dicom: pydicom.Dataset, tag: str): + """ + Get a nullable value from a DICOM. + """ + + if tag not in dicom: + return None + + return dicom[tag].value or None + + +def cmp_int_none(a: int | None, b: int | None): + """ + Order comparison between two nullable integers. + The returned value is in accordance with `functools.cmp_to_key`. + https://docs.python.org/3/library/functools.html#functools.cmp_to_key + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b: + return a - b + + +def cmp_string_none(a: str | None, b: str | None): + """ + Order comparison between two nullable strings. + The returned value is in accordance with `functools.cmp_to_key`. + https://docs.python.org/3/library/functools.html#functools.cmp_to_key + """ + + match a, b: + case None, None: + return 0 + case _, None: + return -1 + case None, _: + return 1 + case a, b if a < b: + return -1 + case a, b if a > b: + return 1 + case a, b: + return 0 + + +def cmp_files(a: DicomFile, b: DicomFile): + """ + Compare the order of two files to sort them in the summary. + """ + + return \ + cmp_int_none(a.series_number, b.series_number) or \ + cmp_int_none(a.file_number, b.file_number) or \ + cmp_int_none(a.echo_number, b.echo_number) + + +def cmp_acquis(a: Acquisition, b: Acquisition): + """ + Compare the order of two acquisitions to sort them in the summary. + """ + + return \ + a.series_number - b.series_number or \ + cmp_string_none(a.sequence_name, b.sequence_name) + + +def make(dir_path: str, verbose: bool): + """ + Create a DICOM summary object from a DICOM directory path. + """ + + info = None + dicom_files: list[DicomFile] = [] + other_files: list[OtherFile] = [] + acquis_dict: dict[tuple[int, int | None, str | None], Acquisition] = dict() + + file_paths = get_all_files(dir_path) + for i, file_path in enumerate(file_paths): + if verbose: + print(f'Processing file \'{file_path}\' ({i + 1}/{len(file_paths)})') + + try: + dicom = pydicom.dcmread(dir_path + '/' + file_path) # type: ignore + if info is None: + info = make_info(dicom) + + dicom_files.append(make_dicom_file(dicom)) + + series = dicom.SeriesNumber + echo = get_value_none(dicom, 'EchoNumbers') + sequence = get_value_none(dicom, 'SequenceName') + + if (series, sequence, echo) not in acquis_dict: + acquis_dict[(series, sequence, echo)] = make_acqui(dicom) + + acquis_dict[(series, sequence, echo)].number_of_files += 1 + except pydicom.errors.InvalidDicomError: + other_files.append(make_other_file(dir_path + '/' + file_path)) + + if info is None: + raise Exception('Found no DICOM file in the directory.') + + acquis = list(acquis_dict.values()) + + dicom_files = sorted(dicom_files, key=cmp_to_key(cmp_files)) + acquis = sorted(acquis, key=cmp_to_key(cmp_acquis)) + + return Summary(info, acquis, dicom_files, other_files) + + +def make_info(dicom: pydicom.Dataset): + """ + Create an `Info` object from a DICOM file, containing general information + about the DICOM directory. + """ + + birth_date = read_dicom_date_none(get_value_none(dicom, 'PatientBirthDate')) + scan_date = read_dicom_date_none(get_value_none(dicom, 'StudyDate')) + + patient = Patient( + get_value(dicom, 'PatientID'), + get_value(dicom, 'PatientName'), + get_value_none(dicom, 'PatientSex'), + birth_date, + ) + + scanner = Scanner( + get_value(dicom, 'Manufacturer'), + get_value(dicom, 'ManufacturerModelName'), + get_value(dicom, 'DeviceSerialNumber'), + get_value(dicom, 'SoftwareVersions'), + ) + + return Info( + get_value(dicom, 'StudyInstanceUID'), + patient, + scanner, + scan_date, + get_value_none(dicom, 'InstitutionName'), + get_value(dicom, 'Modality'), + ) + + +def make_dicom_file(dicom: pydicom.Dataset): + """ + Create a `DicomFile` object from a DICOM file, containing information about + this DICOM file. + """ + return DicomFile( + os.path.basename(dicom.filename), + make_hash(dicom.filename), + get_value_none(dicom, 'SeriesNumber'), + get_value_none(dicom, 'SeriesInstanceUID'), + get_value_none(dicom, 'SeriesDescription'), + get_value_none(dicom, 'InstanceNumber'), + get_value_none(dicom, 'EchoNumbers'), + get_value_none(dicom, 'EchoTime'), + get_value_none(dicom, 'SequenceName'), + ) + + +def make_other_file(file_path: str): + """ + Create an `OtherFile` object from a non-DICOM file, containing information + about this file. + """ + return OtherFile( + os.path.basename(file_path), + make_hash(file_path), + ) + + +def make_acqui(dicom: pydicom.Dataset): + """ + Create an `Acquisition` object from a DICOM file, containg information + about a DICOM series. + """ + return Acquisition( + get_value(dicom, 'SeriesNumber'), + get_value_none(dicom, 'SeriesInstanceUID'), + get_value_none(dicom, 'SeriesDescription'), + get_value_none(dicom, 'SequenceName'), + get_value_none(dicom, 'EchoTime'), + get_value_none(dicom, 'RepetitionTime'), + get_value_none(dicom, 'InversionTime'), + get_value_none(dicom, 'SliceThickness'), + get_value_none(dicom, 'InPlanePhaseEncodingDirection'), + 0, + get_value_none(dicom, 'Modality'), + ) diff --git a/python/lib/dicom/summary_type.py b/python/lib/dicom/summary_type.py new file mode 100644 index 000000000..6bf724b8e --- /dev/null +++ b/python/lib/dicom/summary_type.py @@ -0,0 +1,101 @@ +from dataclasses import dataclass +from datetime import date + + +@dataclass +class Patient: + """ + DICOM patient object, which contains information about a DICOM patient. + """ + + id: str + name: str + sex: str | None + birth_date: date | None + + +@dataclass +class Scanner: + """ + DICOM scanner object, which contains information about a DICOM scanner. + """ + + manufacturer: str + model: str + serial_number: str + software_version: str + + +@dataclass +class Info: + """ + General DICOM information object, which contains general information about + a DICOM directory. + """ + + study_uid: str + patient: Patient + scanner: Scanner + scan_date: date | None + institution: str | None + modality: str + + +@dataclass +class DicomFile: + """ + DICOM file object, which contains information about a DICOM file inside a + DICOM directory. + """ + + file_name: str + md5_sum: str + series_number: int | None + series_uid: str | None + series_description: str | None + file_number: int | None + echo_number: int | None + echo_time: float | None + sequence_name: str | None + + +@dataclass +class OtherFile: + """ + Non-DICOM file object, which contains information about a non-DICOM file + inside a DICOM directory. + """ + + file_name: str + md5_sum: str + + +@dataclass +class Acquisition: + """ + DICOM acquisition object, which contains information about a DICOM series. + """ + + series_number: int + series_uid: str | None + series_description: str | None + sequence_name: str | None + echo_time: float | None # In Milliseconds + repetition_time: float | None # In Milliseconds + inversion_time: float | None # In Milliseconds + slice_thickness: float | None # In Millimeters + phase_encoding: str | None + number_of_files: int + modality: str | None + + +@dataclass +class Summary: + """ + DICOM summary object, which contains information about a DICOM directory. + """ + + info: Info + acquis: list[Acquisition] + dicom_files: list[DicomFile] + other_files: list[OtherFile] diff --git a/python/lib/dicom/summary_write.py b/python/lib/dicom/summary_write.py new file mode 100644 index 000000000..d6ba6935b --- /dev/null +++ b/python/lib/dicom/summary_write.py @@ -0,0 +1,130 @@ +import xml.etree.ElementTree as ET +from lib.dicom.summary_type import Summary, Info, Acquisition, DicomFile, OtherFile +from lib.dicom.text_dict import DictWriter +from lib.dicom.text_table import TableWriter +from lib.dicom.text import write_date_none + + +def write_to_file(filename: str, summary: Summary): + """ + Serialize a DICOM summary object into a text file. + """ + string = write_to_string(summary) + with open(filename, 'w') as file: + file.write(string) + + +def write_to_string(summary: Summary) -> str: + """ + Serialize a DICOM summary object into a string. + """ + return ET.tostring(write_xml(summary), encoding='unicode') + '\n' + + +def write_xml(summary: Summary): + study = ET.Element('STUDY') + ET.SubElement(study, 'STUDY_INFO').text = write_info(summary.info) + ET.SubElement(study, 'FILES').text = write_dicom_files_table(summary.dicom_files) + ET.SubElement(study, 'OTHERS').text = write_other_files_table(summary.other_files) + ET.SubElement(study, 'ACQUISITIONS').text = write_acquis_table(summary.acquis) + ET.SubElement(study, 'SUMMARY').text = write_ending(summary) + ET.indent(study, space='') + return study + + +def write_info(info: Info): + return '\n' + DictWriter([ + ('Unique Study ID' , info.study_uid), + ('Patient Name' , info.patient.name), + ('Patient ID' , info.patient.id), + ('Patient date of birth' , write_date_none(info.patient.birth_date)), + ('Patient Sex' , info.patient.sex), + ('Scan Date' , write_date_none(info.scan_date)), + ('Scanner Manufacturer' , info.scanner.manufacturer), + ('Scanner Model Name' , info.scanner.model), + ('Scanner Serial Number' , info.scanner.serial_number), + ('Scanner Software Version' , info.scanner.software_version), + ('Institution Name' , info.institution), + ('Modality' , info.modality), + ]).write() + + +def write_dicom_files_table(files: list[DicomFile]): + writer = TableWriter() + writer.append_row(['SN', 'FN', 'EN', 'Series', 'md5sum', 'File name']) + for file in files: + writer.append_row([ + file.series_number, + file.file_number, + file.echo_number, + file.series_description, + file.md5_sum, + file.file_name, + ]) + + return '\n' + writer.write() + + +def write_other_files_table(files: list[OtherFile]): + writer = TableWriter() + writer.append_row(['md5sum', 'File name']) + for file in files: + writer.append_row([ + file.md5_sum, + file.file_name, + ]) + + return '\n' + writer.write() + + +def write_acquis_table(acquis: list[Acquisition]): + writer = TableWriter() + writer.append_row([ + 'Series (SN)', + 'Name of series', + 'Seq Name', + 'echoT ms', + 'repT ms', + 'invT ms', + 'sth mm', + 'PhEnc', + 'NoF', + 'Series UID', + 'Mod' + ]) + + for acqui in acquis: + writer.append_row([ + acqui.series_number, + acqui.series_description, + acqui.sequence_name, + acqui.echo_time, + acqui.repetition_time, + acqui.inversion_time, + acqui.slice_thickness, + acqui.phase_encoding, + acqui.number_of_files, + acqui.series_uid, + acqui.modality, + ]) + + return '\n' + writer.write() + + +def write_ending(summary: Summary): + birth_date = summary.info.patient.birth_date + scan_date = summary.info.scan_date + + if birth_date and scan_date: + years = scan_date.year - birth_date.year + months = scan_date.month - birth_date.month + days = scan_date.day - birth_date.day + total = round(years + months / 12 + days / 365.0, 2) + age = f'{total} or {years} years, {months} months {days} days' + else: + age = '' + + return '\n' + DictWriter([ + ('Total number of files', len(summary.dicom_files) + len(summary.other_files)), + ('Age at scan', age), + ]).write() diff --git a/python/lib/dicom/text.py b/python/lib/dicom/text.py new file mode 100644 index 000000000..9d2269008 --- /dev/null +++ b/python/lib/dicom/text.py @@ -0,0 +1,79 @@ +""" +A bunch of functions to convert values between (possibly empty) strings and +different types of values. +""" + +from datetime import datetime, date +import hashlib +import os + + +def write_value(value: str | int | float | None): + if value is None: + return '' + + return str(value) + + +def write_datetime(datetime: datetime): + return datetime.strftime('%Y-%m-%d %H:%M:%S') + + +def write_date(date: date): + return date.strftime('%Y-%m-%d') + + +def write_date_none(date: date | None): + if date is None: + return None + + return write_date(date) + + +def read_none(string: str): + if string == '': + return None + + return string + + +def read_date_none(string: str | None): + if string is None: + return None + + return datetime.strptime(string, '%Y-%m-%d').date() + + +def read_dicom_date_none(string: str | None): + if string is None: + return None + + return datetime.strptime(string, '%Y%m%d').date() + + +def read_int_none(string: str | None): + if string is None: + return None + + return int(string) + + +def read_float_none(string: str | None): + if string is None: + return None + + return float(string) + + +def make_hash(path: str, with_name: bool = False): + """ + Get the MD5 sum hash of a file, with or without the filename appended. + """ + + with open(path, 'rb') as file: + hash = hashlib.md5(file.read()).hexdigest() + + if with_name: + hash = f'{hash} {os.path.basename(path)}' + + return hash diff --git a/python/lib/dicom/text_dict.py b/python/lib/dicom/text_dict.py new file mode 100644 index 000000000..ac8fc08d7 --- /dev/null +++ b/python/lib/dicom/text_dict.py @@ -0,0 +1,43 @@ +from lib.dicom.text import write_value + + +class DictWriter: + """ + Writer for a text dictionary, i.e, a text of the form: + + Key 1 : Value 1 + Key 2 : Value 2 + ... + """ + + def __init__(self, entries: list[tuple[str, str | int | float | None]]): + self.entries = entries + + def get_keys_length(self): + """ + Get the maximal length of the keys, used for padding + """ + length = 0 + for entry in self.entries: + key = entry[0] + if len(key) > length: + length = len(key) + + return length + + def write(self): + """ + Serialize the text dictionary into a string + """ + + if not self.entries: + return '\n' + + length = self.get_keys_length() + + entries = map( + lambda entry: f'* {entry[0].ljust(length)} : {write_value(entry[1])}\n', + self.entries, + ) + + return ''.join(entries) diff --git a/python/lib/dicom/text_table.py b/python/lib/dicom/text_table.py new file mode 100644 index 000000000..0c3109ca8 --- /dev/null +++ b/python/lib/dicom/text_table.py @@ -0,0 +1,52 @@ +from lib.dicom.text import write_value + + +class TableWriter: + """ + Writer for a text table, i.e, a table of the form: + + Field 1 | Field 2 | Field 3 + Value 1 | Value 2 | Value 3 + Value 4 | Value 5 | Value 6 + ... + """ + + rows: list[list[str]] + + def __init__(self): + self.rows = [] + + def get_cells_lengths(self): + """ + Get the longest value length of each column, used for padding + """ + + lengths = [0] * len(self.rows[0]) + for row in self.rows: + for i in range(len(row)): + if len(row[i]) > lengths[i]: + lengths[i] = len(row[i]) + + return lengths + + def append_row(self, cells: list[str | int | float | None]): + """ + Add a row to the table, which can be either the header or some values. + """ + + self.rows.append(list(map(write_value, cells))) + + def write(self): + """ + Serialize the text table into a string. + """ + + if not self.rows: + return '\n' + + lengths = self.get_cells_lengths() + + rows = map(lambda row: list(map(lambda cell, length: cell.ljust(length), row, lengths)), self.rows) + rows = map(lambda row: ' | '.join(row).rstrip() + '\n', rows) + + return ''.join(rows) diff --git a/python/lib/utilities.py b/python/lib/utilities.py index 9d1bd9c92..b8d6e0d73 100755 --- a/python/lib/utilities.py +++ b/python/lib/utilities.py @@ -85,6 +85,25 @@ def append_to_tsv_file(new_tsv_file, old_tsv_file, key_value_check, verbose): writer.writerow(data) +def get_all_files(dir: str) -> list[str]: + """ + Recursively get the all the files inside a given directory, without including the directories + themselves. The returned paths are relative to the given directory. + """ + + def get_all_files_rec(dir: str, path: str): + if os.path.isdir(dir + '/' + path): + files = [] + for file in os.listdir(dir + '/' + path): + files += get_all_files_rec(dir, path + '/' + file) + + return files + + return [path] + + return get_all_files_rec(dir, '') + + def copy_file(file_orig, file_copy, verbose): """ Copies a file to a new location. If something goes wrong during the copy diff --git a/python/requirements.txt b/python/requirements.txt index 9e5eaa6ed..0980b0fb6 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -12,6 +12,7 @@ nose numpy protobuf>=3.0.0 pybids==0.17.0 +pydicom pyright pytest python-dateutil diff --git a/python/tests/unit/db/query/test_dicom_archive.py b/python/tests/unit/db/query/test_dicom_archive.py new file mode 100644 index 000000000..8312e4f85 --- /dev/null +++ b/python/tests/unit/db/query/test_dicom_archive.py @@ -0,0 +1,175 @@ +from dataclasses import dataclass +from sqlalchemy import select +from sqlalchemy.orm import Session as Database +import pytest + +from lib.db.model.dicom_archive import DbDicomArchive +from lib.db.model.dicom_archive_file import DbDicomArchiveFile +from lib.db.model.dicom_archive_series import DbDicomArchiveSeries +from lib.db.query.dicom_archive import ( + delete_dicom_archive_file_series, + get_dicom_archive_series_with_file_info, try_get_dicom_archive_with_study_uid +) +from tests.util.database import create_test_database + + +@dataclass +class Setup: + db: Database + dicom_archive: DbDicomArchive + dicom_archive_series: DbDicomArchiveSeries + + +@pytest.fixture +def setup(): + db = create_test_database() + + dicom_archive_1 = DbDicomArchive( + study_uid = '1.2.256.100000.1.2.3.456789', + patient_id = 'DCC001_111111_V1', + patient_name = 'DCC001_111111_V1', + center_name = 'Test center', + acquisition_count = 2, + dicom_file_count = 2, + non_dicom_file_count = 0, + creating_user = 'admin', + sum_type_version = 2, + tar_type_version = 2, + source_location = '/tests/DCC001_111111_V1', + scanner_manufacturer = 'Test scanner manufacturer', + scanner_model = 'Test scanner model', + scanner_serial_number = 'Test scanner serial number', + scanner_software_version = 'Test scanner software version', + upload_attempt = 0, + acquisition_metadata = '', + pending_transfer = 0, + + ) + + dicom_archive_2 = DbDicomArchive( + study_uid = '2.16.999.1.2.3.456789', + patient_id = 'DCC002_222222_V2', + patient_name = 'DCC002_222222_V2', + center_name = 'Test center', + acquisition_count = 1, + dicom_file_count = 1, + non_dicom_file_count = 0, + creating_user = 'admin', + sum_type_version = 2, + tar_type_version = 2, + source_location = '/test/DCC002_222222_V2', + scanner_manufacturer = 'Test scanner manufacturer', + scanner_model = 'Test scanner model', + scanner_serial_number = 'Test scanner serial number', + scanner_software_version = 'Test scanner software version', + upload_attempt = 0, + acquisition_metadata = '', + pending_transfer = 0, + ) + + db.add(dicom_archive_1) + db.add(dicom_archive_2) + db.flush() + + dicom_archive_series_1_1 = DbDicomArchiveSeries( + archive_id = dicom_archive_1.id, + series_number = 1, + sequence_name = 'ep_b100', + echo_time = 100, + number_of_files = 1, + series_uid = '1.3.12.2.11.11.11.999.0.0', + modality = 'MR', + ) + + dicom_archive_series_1_2 = DbDicomArchiveSeries( + archive_id = dicom_archive_1.id, + series_number = 2, + sequence_name = 'ep_b200', + echo_time = 200, + number_of_files = 1, + series_uid = '1.3.12.2.11.11.11.999.0.0', + modality = 'MR', + ) + + dicom_archive_series_2_1 = DbDicomArchiveSeries( + archive_id = dicom_archive_2.id, + series_number = 1, + sequence_name = 'ep_b100', + echo_time = 100, + number_of_files = 1, + series_uid = '1.3.12.2.99.99.99.1111.0.0', + modality = 'MR', + ) + + db.add(dicom_archive_series_1_1) + db.add(dicom_archive_series_1_2) + db.add(dicom_archive_series_2_1) + db.flush() + + dicom_archive_file_1_1 = DbDicomArchiveFile( + archive_id = dicom_archive_1.id, + series_id = dicom_archive_series_1_1.id, + md5_sum = '01234567890abcdef0123456789abcde', + file_name = '1.1.dcm', + ) + + dicom_archive_file_1_2 = DbDicomArchiveFile( + archive_id = dicom_archive_1.id, + series_id = dicom_archive_series_1_2.id, + md5_sum = '01234567890abcdef0123456789abcde', + file_name = '1.2.dcm', + ) + + dicom_archive_file_2_1 = DbDicomArchiveFile( + archive_id = dicom_archive_2.id, + series_id = dicom_archive_series_2_1.id, + md5_sum = '01234567890abcdef0123456789abcde', + file_name = '2.1.dcm', + ) + + db.add(dicom_archive_file_1_1) + db.add(dicom_archive_file_1_2) + db.add(dicom_archive_file_2_1) + db.flush() + + return Setup(db, dicom_archive_1, dicom_archive_series_1_1) + + +def test_try_get_dicom_archive_with_study_uid_some(setup: Setup): + dicom_archive = try_get_dicom_archive_with_study_uid( + setup.db, + '1.2.256.100000.1.2.3.456789', + ) + + assert dicom_archive is setup.dicom_archive + + +def test_try_get_dicom_archive_with_study_uid_none(setup: Setup): + dicom_archive = try_get_dicom_archive_with_study_uid( + setup.db, + '1.2.256.999999.9.8.7654321', + ) + + assert dicom_archive is None + + +def test_delete_dicom_archive_file_series(setup: Setup): + delete_dicom_archive_file_series(setup.db, setup.dicom_archive) + + assert setup.db.execute(select(DbDicomArchiveFile) + .where(DbDicomArchiveFile.archive_id == setup.dicom_archive.id)).first() is None + + assert setup.db.execute(select(DbDicomArchiveSeries) + .where(DbDicomArchiveSeries.archive_id == setup.dicom_archive.id)).first() is None + + +def test_get_dicom_archive_series_with_file_info(setup: Setup): + dicom_archive_series = get_dicom_archive_series_with_file_info( + setup.db, + '1.3.12.2.11.11.11.999.0.0', + 1, + 100, + 'ep_b100', + ) + + assert dicom_archive_series is setup.dicom_archive_series