Skip to content

Commit

Permalink
Make it clear when study already exported (#585)
Browse files Browse the repository at this point in the history
* Make it clear when study already exported

* Document slightly unusual test setup choices
  • Loading branch information
stefpiatek authored Jan 14, 2025
1 parent b3921b6 commit dca65ca
Show file tree
Hide file tree
Showing 3 changed files with 151 additions and 54 deletions.
39 changes: 22 additions & 17 deletions pixl_dcmd/src/pixl_dcmd/_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
from sqlalchemy.orm.session import Session

from core.db.models import Image, Extract
from sqlalchemy import URL, create_engine, exists
from sqlalchemy import ColumnElement, URL, create_engine, exists
from sqlalchemy.orm import sessionmaker, exc

from core.exceptions import PixlDiscardError
from pixl_dcmd.dicom_helpers import StudyInfo

url = URL.create(
Expand Down Expand Up @@ -126,29 +127,33 @@ def get_unexported_image(
Get an existing, non-exported (for this project) image record from the database
identified by the study UID. If no result is found, retry with querying on
MRN + accession number. If this fails as well, raise a NoResultFound.
If study has already been exported, raise a PixlDiscardError.
"""
try:
existing_image: Image = (
pixl_session.query(Image)
.join(Extract)
.filter(
Extract.slug == project_slug,
Image.study_uid == study_info.study_uid,
Image.exported_at == None, # noqa: E711
)
.one()
existing_image = _query_and_raise_if_exported(
pixl_session,
[Extract.slug == project_slug, Image.study_uid == study_info.study_uid],
)
# If no image is found by study UID, try MRN + accession number
except exc.NoResultFound:
existing_image = (
pixl_session.query(Image)
.join(Extract)
.filter(
existing_image = _query_and_raise_if_exported(
pixl_session,
[
Extract.slug == project_slug,
Image.mrn == study_info.mrn,
Image.accession_number == study_info.accession_number,
Image.exported_at == None, # noqa: E711
)
.one()
],
)
return existing_image


def _query_and_raise_if_exported(
pixl_session: Session, clause_list: list[ColumnElement[bool]]
) -> Image:
existing_image: Image = (
pixl_session.query(Image).join(Extract).filter(*clause_list).one()
)
if existing_image.exported_at is not None:
msg = "Study already exported"
raise PixlDiscardError(msg)
return existing_image
162 changes: 127 additions & 35 deletions pixl_dcmd/tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,13 @@
from __future__ import annotations

import datetime
from dataclasses import dataclass

import pytest
import sqlalchemy

from core.db.models import Extract, Image
from core.exceptions import PixlDiscardError
from pixl_dcmd._database import (
get_unexported_image,
get_uniq_pseudo_study_uid_and_update_db,
Expand All @@ -26,13 +30,81 @@
from sqlalchemy.orm import Session

STUDY_DATE = datetime.date.fromisoformat("2023-01-01")


@dataclass
class StudyData:
"""Database setup of study data."""

mrn: str
accession_number: str
study_uid: str | None
study_date: datetime.date | sqlalchemy.Date = STUDY_DATE
pseudo_patient_id: str | None = None
pseudo_study_uid: str | None = None
exported_at: datetime.datetime | sqlalchemy.DateTime | None = None


@dataclass
class TestData:
"""All test data, separated so that the input values can be overriden and different to db data."""

db: StudyData
input: StudyInfo


def _create_test_data(study_data: StudyData) -> TestData:
return TestData(
study_data,
StudyInfo(
mrn=study_data.mrn,
accession_number=study_data.accession_number,
study_uid=study_data.study_uid,
),
)


TEST_PROJECT_SLUG = "test-extract-uclh-omop-cdm"
TEST_STUDY_INFO = StudyInfo(
mrn="123456", accession_number="abcde", study_uid="1.2.3.4.5"
UNPROCESSED_STUDY = _create_test_data(
StudyData(mrn="123456", accession_number="abcde", study_uid="1.2.3.4.5")
)
TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID = StudyInfo(
mrn="234567", accession_number="bcdef", study_uid="2.3.4.5.6"
PSEUDO_IDS_STUDY = _create_test_data(
StudyData(
mrn="234567",
accession_number="bcdef",
study_uid="2.3.4.5.6",
pseudo_patient_id="garbled",
pseudo_study_uid="0.0.0.0.0.0",
)
)

EXPORTED_STUDY = _create_test_data(
StudyData(
mrn="exported_mrn",
accession_number="exported_accession",
study_uid="6.6.6",
pseudo_study_uid="1.1.1",
exported_at=datetime.datetime.fromisoformat("2024-01-01"),
)
)

# For duplicates, the database should have no UIDs
DUPLICATE_ACCESSION_BUT_HAS_UID = _create_test_data(
StudyData(
mrn="duplicate_mrn",
accession_number="duplicate_accession",
study_uid="7.7.7",
)
)

DUPLICATE_ACCESSION_NO_UID = _create_test_data(
StudyData(
mrn="duplicate_mrn",
accession_number="duplicate_accession",
study_uid="8.8.8",
)
)
DUPLICATE_ACCESSION_NO_UID.db.study_uid = None


@pytest.fixture()
Expand All @@ -42,46 +114,45 @@ def rows_for_database_testing(db_session) -> Session:
pytest_pixl.dicom.generate_dicom_dataset.
"""
extract = Extract(slug=TEST_PROJECT_SLUG)

existing_image = Image(
mrn=TEST_STUDY_INFO.mrn,
accession_number=TEST_STUDY_INFO.accession_number,
study_uid=TEST_STUDY_INFO.study_uid,
study_date=STUDY_DATE,
extract=extract,
)

existing_image_with_pseudo_study_uid = Image(
mrn=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn,
accession_number=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.accession_number,
study_uid=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.study_uid,
study_date=STUDY_DATE,
extract=extract,
# This should be a valid VR UI value!
# https://dicom.nema.org/medical/dicom/current/output/html/part05.html#table_6.2-1
pseudo_study_uid="0.0.0.0.0.0",
pseudo_patient_id=TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn,
)

with db_session:
db_session.add_all(
[extract, existing_image, existing_image_with_pseudo_study_uid]
[
extract,
_image_from_study_data(UNPROCESSED_STUDY.db, extract),
_image_from_study_data(PSEUDO_IDS_STUDY.db, extract),
_image_from_study_data(EXPORTED_STUDY.db, extract),
_image_from_study_data(DUPLICATE_ACCESSION_BUT_HAS_UID.db, extract),
_image_from_study_data(DUPLICATE_ACCESSION_NO_UID.db, extract),
]
)
db_session.commit()

return db_session


def _image_from_study_data(study_data: StudyData, extract: Extract) -> Image:
return Image(
mrn=study_data.mrn,
accession_number=study_data.accession_number,
study_uid=study_data.study_uid,
study_date=study_data.study_date,
pseudo_study_uid=study_data.pseudo_study_uid,
pseudo_patient_id=study_data.pseudo_patient_id,
exported_at=study_data.exported_at,
extract=extract,
)


def test_get_uniq_pseudo_study_uid_and_update_db(rows_for_database_testing, db_session):
"""
GIVEN an existing image that already has a pseudo_study_uid
WHEN we query the database for that image
THEN the function should return the existing pseudo_study_uid.
"""
pseudo_study_uid = get_uniq_pseudo_study_uid_and_update_db(
TEST_PROJECT_SLUG, TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID
TEST_PROJECT_SLUG, PSEUDO_IDS_STUDY.input
)
assert pseudo_study_uid == "0.0.0.0.0.0"
assert pseudo_study_uid == PSEUDO_IDS_STUDY.db.pseudo_study_uid


def test_get_pseudo_patient_id_and_update_db(rows_for_database_testing, db_session):
Expand All @@ -92,13 +163,11 @@ def test_get_pseudo_patient_id_and_update_db(rows_for_database_testing, db_sessi
"""
get_pseudo_patient_id_and_update_db(
TEST_PROJECT_SLUG,
TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID,
TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn,
)
result = get_unexported_image(
TEST_PROJECT_SLUG, TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID, db_session
PSEUDO_IDS_STUDY.input,
PSEUDO_IDS_STUDY.input.mrn,
)
assert result.pseudo_patient_id == TEST_STUDY_INFO_WITH_PSEUDO_PATIENT_ID.mrn
result = get_unexported_image(TEST_PROJECT_SLUG, PSEUDO_IDS_STUDY.input, db_session)
assert result.pseudo_patient_id == PSEUDO_IDS_STUDY.db.pseudo_patient_id


def test_get_unexported_image_fallback(rows_for_database_testing, db_session):
Expand All @@ -113,4 +182,27 @@ def test_get_unexported_image_fallback(rows_for_database_testing, db_session):
study_uid="nope",
)
result = get_unexported_image(TEST_PROJECT_SLUG, wrong_uid_info, db_session)
assert result.study_uid == TEST_STUDY_INFO.study_uid
assert result.study_uid == UNPROCESSED_STUDY.input.study_uid


def test_exported_image_throws(rows_for_database_testing, db_session):
"""
GIVEN a database entry with the image exported
WHEN we query for the image
THEN a NoRowFound exception should be thrown
"""
with pytest.raises(PixlDiscardError) as exception:
get_unexported_image(TEST_PROJECT_SLUG, EXPORTED_STUDY.input, db_session)
assert str(exception.value) == "Study already exported"


def test_duplicate_image_throws(rows_for_database_testing, db_session):
"""
GIVEN the database has two entries for the same accession number, one with a study uid
WHEN the input without the study uid is processed
THEN a MultipleResultsFound exception should be thrown
"""
with pytest.raises(sqlalchemy.exc.MultipleResultsFound):
get_unexported_image(
TEST_PROJECT_SLUG, DUPLICATE_ACCESSION_NO_UID.input, db_session
)
4 changes: 2 additions & 2 deletions pixl_dcmd/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
import numpy as np
import pydicom
import pytest
import sqlalchemy
from pytest_check import check
from core.db.models import Image
from core.dicom_tags import (
PrivateDicomTag,
add_private_tag,
create_private_tag,
)
from core.exceptions import PixlDiscardError
from core.project_config import load_project_config, load_tag_operations
from core.project_config.pixl_config_model import load_config_and_validate
from decouple import config
Expand Down Expand Up @@ -251,7 +251,7 @@ def test_image_already_exported_throws(test_project_config, exported_dicom_datas
WHEN the dicom tag scheme is applied
THEN an exception will be thrown as
"""
with pytest.raises(sqlalchemy.exc.NoResultFound):
with pytest.raises(PixlDiscardError):
anonymise_dicom_and_update_db(
exported_dicom_dataset,
config=test_project_config,
Expand Down

0 comments on commit dca65ca

Please sign in to comment.