Skip to content

Commit

Permalink
rewrite dicom archive
Browse files Browse the repository at this point in the history
  • Loading branch information
maximemulder committed Sep 24, 2024
1 parent 4823143 commit 2e7b3c4
Show file tree
Hide file tree
Showing 17 changed files with 1,430 additions and 2 deletions.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,11 @@ select = ["E", "W"]
# `test` directory.
[tool.pyright]
include = [
"python/dicom_archive.py",
"python/dicom_summary.py",
"python/tests",
"python/lib/db",
"python/lib/dicom",
"python/lib/exception",
"python/lib/validate_subject_ids.py",
]
Expand Down
301 changes: 301 additions & 0 deletions python/dicom_archive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,301 @@
#!/usr/bin/env python

from dataclasses import dataclass
from typing import Any, cast
import gzip
import os
import shutil
import sys
import tarfile

from lib.db.connect import connect_to_db
import lib.dicom.dicom_database
import lib.dicom.dicom_log
import lib.dicom.summary_make
import lib.dicom.summary_write
import lib.dicom.text
import lib.exitcode
from lib.lorisgetopt import LorisGetOpt
from lib.db.model.dicom_archive import DbDicomArchive
from lib.db.query.dicom_archive import try_get_dicom_archive_with_study_uid


def print_error_exit(message: str, code: int):
print(f'ERROR: {message}', file=sys.stderr)
sys.exit(code)


def print_warning(message: str):
print(f'WARNING: {message}', file=sys.stderr)


@dataclass
class Args:
profile: str | None
source: str
target: str
today: bool
year: bool
overwrite: bool
db_insert: bool
db_update: bool
verbose: bool

def __init__(self, options_dict: dict[str, Any]):
self.profile = options_dict['profile']['value']
self.source = options_dict['source']['value']
self.target = options_dict['target']['value']
self.today = options_dict['today']['value']
self.year = options_dict['year']['value']
self.overwrite = options_dict['overwrite']['value']
self.db_insert = options_dict['db-insert']['value']
self.db_update = options_dict['db-update']['value']
self.verbose = options_dict['verbose']['value']


def main():
def check_create_file(path: str):
if os.path.exists(path):
if args.overwrite:
print_warning(f'Overwriting \'{path}\'')
else:
print_error_exit(
(
f'File or directory \'{path}\' already exists. '
'Use option \'--overwrite\' to overwrite it.'
),
lib.exitcode.TARGET_EXISTS_NO_CLOBBER,
)

usage = (
"\n"

"********************************************************************\n"
" DICOM ARCHIVING SCRIPT\n"
"********************************************************************\n"
"The program reads a DICOM directory, processes it into a structured and "
"compressed archive, and insert it or upload it to the LORIS database."

"usage : dicom_archive.py -p <profile> -s <source_dir> -t <target_dir> ...\n\n"

"options: \n"
"\t-p, --profile : Name of the python database config file in dicom-archive/.loris_mri\n"
"\t-s, --source : Source directory containing the DICOM files to archive\n"
"\t-t, --target : Directory in which to place the resulting DICOM archive\n"
"\t --today : Use today's date as the scan date instead of the DICOM scan date\n"
"\t --year : Create the archive in a year subdirectory (example: 2024/DCM_2024-08-27_FooBar.tar)s\n"
"\t --overwrite : Overwrite the DICOM archive file if it already exists\n"
"\t --db-insert : Insert the created DICOM archive in the database (requires the archive\n"
"\t to not be already inserted)\n"
"\t --db-update : Update the DICOM archive in the database (requires the archive to be\n"
"\t already be inserted), generally used with --overwrite"
"\t-v, --verbose : If set, be verbose\n\n"

"required options are: \n"
"\t--profile\n"
"\t--source\n"
"\t--target\n\n"
)

# NOTE: Some options do not have short options but LorisGetOpt does not support that, so we
# repeat the long names.
options_dict = {
"profile": {
"value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False
},
"source": {
"value": None, "required": True, "expect_arg": True, "short_opt": "s", "is_path": True,
},
"target": {
"value": None, "required": True, "expect_arg": True, "short_opt": "t", "is_path": True,
},
"today": {
"value": False, "required": False, "expect_arg": False, "short_opt": "today", "is_path": False,
},
"year": {
"value": False, "required": False, "expect_arg": False, "short_opt": "year", "is_path": False,
},
"overwrite": {
"value": False, "required": False, "expect_arg": False, "short_opt": "overwrite", "is_path": False,
},
"db-insert": {
"value": False, "required": False, "expect_arg": False, "short_opt": "db-insert", "is_path": False,
},
"db-update": {
"value": False, "required": False, "expect_arg": False, "short_opt": "db-update", "is_path": False,
},
"verbose": {
"value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False
},
"help": {
"value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False
},
}

# Get the CLI arguments and connect to the database

loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3]))
args = Args(loris_getopt_obj.options_dict)

db = connect_to_db(cast(Any, loris_getopt_obj.config_info).mysql)

# Check arguments

if args.db_insert and args.db_update:
print_error_exit(
'Arguments \'--db-insert\' and \'--db-update\' must not be set both at the same time.',
lib.exitcode.INVALID_ARG,
)

if not os.path.isdir(args.source) or not os.access(args.source, os.R_OK):
print_error_exit(
'Argument \'--source\' must be a readable directory path.',
lib.exitcode.INVALID_ARG,
)

if not os.path.isdir(args.target) or not os.access(args.target, os.W_OK):
print_error_exit(
'Argument \'--target\' must be a writable directory path.',
lib.exitcode.INVALID_ARG,
)

# Check paths

base_name = os.path.basename(args.source)

tar_path = f'{args.target}/{base_name}.tar'
zip_path = f'{args.target}/{base_name}.tar.gz'
summary_path = f'{args.target}/{base_name}.meta'
log_path = f'{args.target}/{base_name}.log'

check_create_file(tar_path)
check_create_file(zip_path)
check_create_file(summary_path)
check_create_file(log_path)

print('Extracting DICOM information (may take a long time)')

summary = lib.dicom.summary_make.make(args.source, args.verbose)

print('Checking database presence')

db_archive = try_get_dicom_archive_with_study_uid(db, summary.info.study_uid)

if args.db_insert and db_archive is not None:
print_error_exit(
(
f'Study \'{summary.info.study_uid}\' is already inserted in the database\n'
'Previous archiving log:\n'
f'{db_archive.create_info}'
),
lib.exitcode.INSERT_FAILURE,
)

if args.db_update and db_archive is None:
print_error_exit(
f'No study \'{summary.info.study_uid}\' found in the database',
lib.exitcode.UPDATE_FAILURE,
)

print('Copying into DICOM tar')

with tarfile.open(tar_path, 'w') as tar:
for file in os.listdir(args.source):
tar.add(args.source + '/' + file)

print('Calculating DICOM tar MD5 sum')

tarball_md5_sum = lib.dicom.text.make_hash(tar_path, True)

print('Zipping DICOM tar (may take a long time)')

with open(tar_path, 'rb') as tar:
# 6 is the default compression level of the tar command, Python's
# default is 9, which is more powerful but also too slow.
with gzip.open(zip_path, 'wb', compresslevel=6) as zip:
shutil.copyfileobj(tar, zip)

print('Calculating DICOM zip MD5 sum')

zipball_md5_sum = lib.dicom.text.make_hash(zip_path, True)

print('Getting DICOM scan date')

if not args.today and summary.info.scan_date is None:
print_warning((
'No scan date was found in the DICOMs, '
'consider using argument \'--today\' to use today\'s date as the scan date.'
))

if args.year and summary.info.scan_date is None:
print_warning((
'Argument \'--year\' was provided but no scan date was found in the DICOMs, '
'the argument will be ignored.'
))

if args.year and summary.info.scan_date is not None:
dir_path = f'{args.target}/{summary.info.scan_date.year}'
if not os.path.exists(dir_path):
print(f'Creating directory \'{dir_path}\'')
os.mkdir(dir_path)
elif not os.path.isdir(dir_path) or not os.access(dir_path, os.W_OK):
print_error_exit(
f'Path \'{dir_path}\' exists but is not a writable directory.',
lib.exitcode.CREATE_DIR_FAILURE,
)
else:
dir_path = args.target

if summary.info.scan_date is not None:
scan_date_string = lib.dicom.text.write_date(summary.info.scan_date)
archive_path = f'{dir_path}/DCM_{scan_date_string}_{base_name}.tar'
else:
archive_path = f'{dir_path}/DCM_{base_name}.tar'

check_create_file(archive_path)

log = lib.dicom.dicom_log.make(args.source, archive_path, tarball_md5_sum, zipball_md5_sum)

if args.verbose:
print('The archive will be created with the following arguments:')
print(lib.dicom.dicom_log.write_to_string(log))

print('Writing summary file')

lib.dicom.summary_write.write_to_file(summary_path, summary)

print('Writing log file')

lib.dicom.dicom_log.write_to_file(log_path, log)

print('Copying into DICOM archive')

with tarfile.open(archive_path, 'w') as tar:
tar.add(zip_path, os.path.basename(zip_path))
tar.add(summary_path, os.path.basename(summary_path))
tar.add(log_path, os.path.basename(log_path))

print('Removing temporary files')

os.remove(tar_path)
os.remove(zip_path)
os.remove(summary_path)
os.remove(log_path)

print('Calculating DICOM tar MD5 sum')

log.archive_md5_sum = lib.dicom.text.make_hash(log.target_path, True)

if args.db_insert:
lib.dicom.dicom_database.insert(db, log, summary)

if args.db_update:
# Safe because we checked previously that the DICOM archive is not `None`
db_archive = cast(DbDicomArchive, db_archive)
lib.dicom.dicom_database.update(db, db_archive, log, summary)

print('Success')


if __name__ == "__main__":
main()
50 changes: 50 additions & 0 deletions python/dicom_summary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env python

import argparse
from dataclasses import dataclass
import sys
import traceback

import lib.dicom.summary_make
import lib.dicom.summary_write
import lib.exitcode

parser = argparse.ArgumentParser(description=(
'Read a DICOM directory and print the DICOM summary of this directory '
'in the the console.'
))

parser.add_argument(
'directory',
help='The DICOM directory')

parser.add_argument(
'--verbose',
action='store_true',
help='Set the script to be verbose')


@dataclass
class Args:
directory: str
verbose: bool


def main():
parsed_args = parser.parse_args()
args = Args(parsed_args.directory, parsed_args.verbose)

try:
summary = lib.dicom.summary_make.make(args.directory, args.verbose)
except Exception as e:
print(f'ERROR: Cannot create a summary for the directory \'{args.directory}\'.', file=sys.stderr)
print('Exception message:', file=sys.stderr)
print(e, file=sys.stderr)
traceback.print_exc(file=sys.stderr)
exit(lib.exitcode.INVALID_DICOM)

print(lib.dicom.summary_write.write_to_string(summary))


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion python/lib/db/model/dicom_archive_series.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Optional
from sqlalchemy.orm import Mapped, mapped_column, relationship
from sqlalchemy import ForeignKey
from sqlalchemy.orm import Mapped, mapped_column, relationship
from lib.db.base import Base
import lib.db.model.dicom_archive as db_dicom_archive
import lib.db.model.dicom_archive_file as db_dicom_archive_file
Expand Down
2 changes: 1 addition & 1 deletion python/lib/db/model/mri_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class DbMriUpload(Base):
dicom_archive_id : Mapped[Optional[int]] \
= mapped_column('TarchiveID', ForeignKey('tarchive.TarchiveID'))
dicom_archive : Mapped[Optional['db_dicom_archive.DbDicomArchive']] \
= relationship('DicomArchive', back_populates='upload')
= relationship('DbDicomArchive', back_populates='upload')
session_id : Mapped[Optional[int]] = mapped_column('SessionID')
is_candidate_info_validated : Mapped[Optional[bool]] = mapped_column('IsCandidateInfoValidated')
is_dicom_archive_validated : Mapped[bool] = mapped_column('IsTarchiveValidated')
Expand Down
Loading

0 comments on commit 2e7b3c4

Please sign in to comment.