Skip to content

Commit

Permalink
[PRMP-1069] update nhs number validation for metadata ingestion
Browse files Browse the repository at this point in the history
  • Loading branch information
abbas-khan10 authored Nov 20, 2024
1 parent 4533487 commit 1427b0c
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 30 deletions.
12 changes: 4 additions & 8 deletions lambdas/models/staging_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

METADATA_FILENAME = "metadata.csv"
NHS_NUMBER_FIELD_NAME = "NHS-NO"
NHS_NUMBER_PLACEHOLDER = "0000000000"


def to_upper_case_with_hyphen(field_name: str) -> str:
Expand All @@ -18,13 +19,9 @@ class MetadataFile(BaseModel):

file_path: str = Field(alias="FILEPATH")
page_count: str = Field(alias="PAGE COUNT")

# A temporary field just to let us retrieve nhs_number during validation.
# For the purpose of single source of truth, better to refer to the nhs number at StagingMetadata
nhs_number: Optional[str] = Field(
alias=NHS_NUMBER_FIELD_NAME, exclude=True, default=None
)

gp_practice_code: str
section: str
sub_section: Optional[str]
Expand All @@ -51,15 +48,14 @@ def ensure_gp_practice_code_non_empty(
class StagingMetadata(BaseModel):
model_config = ConfigDict(populate_by_name=True)

nhs_number: str = Field(alias=NHS_NUMBER_FIELD_NAME)
nhs_number: str = Field(default=NHS_NUMBER_PLACEHOLDER, alias=NHS_NUMBER_FIELD_NAME)
files: list[MetadataFile]

retries: int = 0

@field_validator("nhs_number")
@classmethod
def validate_nhs_number(cls, nhs_number: str) -> str:
if nhs_number.isdigit() and len(nhs_number) == 10:
if nhs_number and nhs_number.isdigit():
return nhs_number

raise ValueError("NHS number must be a 10 digit number")
return NHS_NUMBER_PLACEHOLDER
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"NHS-NO":"0000000000","files":[{"FILEPATH":"1of1_Lloyd_George_Record_[Jane Smith]_[1234567892]_[25-12-2019].txt","PAGE COUNT":"","GP-PRACTICE-CODE":"Y12345","SECTION":"LG","SUB-SECTION":"","SCAN-DATE":"04/09/2022","SCAN-ID":"NEC","USER-ID":"NEC","UPLOAD":"04/10/2023"}],"retries":0}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"NHS-NO":"123456789","files":[{"FILEPATH":"1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[123456789]_[25-12-2019].txt","PAGE COUNT":"","GP-PRACTICE-CODE":"Y12345","SECTION":"LG","SUB-SECTION":"","SCAN-DATE":"04/09/2022","SCAN-ID":"NEC","USER-ID":"NEC","UPLOAD":"04/10/2023"}],"retries":0}

This file was deleted.

3 changes: 2 additions & 1 deletion lambdas/tests/unit/helpers/data/bulk_upload/metadata.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FILEPATH,PAGE COUNT,GP-PRACTICE-CODE,NHS-NO,SECTION,SUB-SECTION,SCAN-DATE,SCAN-ID,USER-ID,UPLOAD
/1234567890/1of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf,,Y12345,1234567890,LG,,03/09/2022,NEC,NEC,04/10/2023
/1234567890/2of2_Lloyd_George_Record_[Joe Bloggs]_[1234567890]_[25-12-2019].pdf,,Y12345,1234567890,LG,,03/09/2022,NEC,NEC,04/10/2023
1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[1234567891]_[25-12-2019].txt,,Y12345,1234567891,LG,,04/09/2022,NEC,NEC,04/10/2023
1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[123456789]_[25-12-2019].txt,,Y12345,123456789,LG,,04/09/2022,NEC,NEC,04/10/2023
1of1_Lloyd_George_Record_[Jane Smith]_[1234567892]_[25-12-2019].txt,,Y12345,,LG,,04/09/2022,NEC,NEC,04/10/2023
36 changes: 25 additions & 11 deletions lambdas/tests/unit/helpers/data/bulk_upload/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,11 @@

patient_2_file_1 = sample_metadata_model.model_copy(
update={
"file_path": "1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[1234567891]_[25-12-2019].txt",
"file_path": "1of1_Lloyd_George_Record_[Joe Bloggs_invalid]_[123456789]_[25-12-2019].txt",
"scan_date": "04/09/2022",
}
)
patient_2 = StagingMetadata(
nhs_number="1234567891", files=[patient_2_file_1], retries=0
)
patient_2 = StagingMetadata(nhs_number="123456789", files=[patient_2_file_1], retries=0)
MOCK_METADATA = [patient_1, patient_2]


Expand All @@ -45,8 +43,8 @@
patient_1_file_2_with_temp_nhs_number = patient_1_file_2.model_copy(
update={"nhs_number": "1234567890"}
)
patient_2_file_1_with_temp_nhs_number = patient_2_file_1.model_copy(
update={"nhs_number": "1234567891"}
patient_2_file_1_with_short_nhs_number = patient_2_file_1.model_copy(
update={"nhs_number": "123456789"}
)
patient_1_with_temp_nhs_number = StagingMetadata(
nhs_number="1234567890",
Expand All @@ -55,12 +53,25 @@
patient_1_file_2_with_temp_nhs_number,
],
)
patient_2_with_temp_nhs_number = StagingMetadata(
nhs_number="1234567891", files=[patient_2_file_1_with_temp_nhs_number]
patient_2_with_short_nhs_number = StagingMetadata(
nhs_number="123456789", files=[patient_2_file_1_with_short_nhs_number]
)

patient_3_with_missing_nhs_number_metadata_file = sample_metadata_model.model_copy(
update={
"nhs_number": "",
"file_path": "1of1_Lloyd_George_Record_[Jane Smith]_[1234567892]_[25-12-2019].txt",
"scan_date": "04/09/2022",
}
)
patient_3_with_missing_nhs_number = StagingMetadata(
nhs_number="0000000000", files=[patient_3_with_missing_nhs_number_metadata_file]
)

EXPECTED_PARSED_METADATA = [
patient_1_with_temp_nhs_number,
patient_2_with_temp_nhs_number,
patient_2_with_short_nhs_number,
patient_3_with_missing_nhs_number,
]


Expand All @@ -74,8 +85,11 @@ def readfile(filename: str) -> str:
EXPECTED_SQS_MSG_FOR_PATIENT_1234567890 = readfile(
"expect_sqs_msg_for_patient_1234567890.json"
)
EXPECTED_SQS_MSG_FOR_PATIENT_1234567891 = readfile(
"expect_sqs_msg_for_patient_1234567891.json"
EXPECTED_SQS_MSG_FOR_PATIENT_123456789 = readfile(
"expect_sqs_msg_for_patient_123456789.json"
)
EXPECTED_SQS_MSG_FOR_PATIENT_0000000000 = readfile(
"expect_sqs_msg_for_patient_0000000000.json"
)


Expand Down
6 changes: 3 additions & 3 deletions lambdas/tests/unit/models/test_staging_metadata_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from models.staging_metadata import StagingMetadata
from tests.unit.helpers.data.bulk_upload.test_data import (
EXPECTED_SQS_MSG_FOR_PATIENT_123456789,
EXPECTED_SQS_MSG_FOR_PATIENT_1234567890,
EXPECTED_SQS_MSG_FOR_PATIENT_1234567891,
patient_1,
patient_2,
)
Expand All @@ -16,7 +16,7 @@ def test_serialise_staging_data_to_json():
)
assert (
patient_2.model_dump_json(by_alias=True)
== EXPECTED_SQS_MSG_FOR_PATIENT_1234567891
== EXPECTED_SQS_MSG_FOR_PATIENT_123456789
)


Expand All @@ -29,7 +29,7 @@ def test_deserialise_json_to_staging_data():
)
assert (
StagingMetadata.model_validate(
json.loads(EXPECTED_SQS_MSG_FOR_PATIENT_1234567891)
json.loads(EXPECTED_SQS_MSG_FOR_PATIENT_123456789)
)
== patient_2
)
19 changes: 13 additions & 6 deletions lambdas/tests/unit/services/test_bulk_upload_metadata_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
from tests.unit.conftest import MOCK_LG_METADATA_SQS_QUEUE, MOCK_STAGING_STORE_BUCKET
from tests.unit.helpers.data.bulk_upload.test_data import (
EXPECTED_PARSED_METADATA,
EXPECTED_SQS_MSG_FOR_PATIENT_0000000000,
EXPECTED_SQS_MSG_FOR_PATIENT_123456789,
EXPECTED_SQS_MSG_FOR_PATIENT_1234567890,
EXPECTED_SQS_MSG_FOR_PATIENT_1234567891,
MOCK_METADATA,
)
from utils.exceptions import BulkUploadMetadataException
Expand Down Expand Up @@ -50,14 +51,20 @@ def test_process_metadata_send_metadata_to_sqs_queue(
call(
group_id="bulk_upload_123412342",
queue_url=MOCK_LG_METADATA_SQS_QUEUE,
message_body=EXPECTED_SQS_MSG_FOR_PATIENT_1234567891,
nhs_number="1234567891",
message_body=EXPECTED_SQS_MSG_FOR_PATIENT_123456789,
nhs_number="123456789",
),
call(
group_id="bulk_upload_123412342",
queue_url=MOCK_LG_METADATA_SQS_QUEUE,
message_body=EXPECTED_SQS_MSG_FOR_PATIENT_0000000000,
nhs_number="0000000000",
),
]

metadata_service.process_metadata(metadata_filename)

assert mock_sqs_service.send_message_with_nhs_number_attr_fifo.call_count == 2
assert mock_sqs_service.send_message_with_nhs_number_attr_fifo.call_count == 3
mock_sqs_service.send_message_with_nhs_number_attr_fifo.assert_has_calls(
expected_calls
)
Expand Down Expand Up @@ -219,8 +226,8 @@ def test_send_metadata_to_sqs(set_env, mocker, mock_sqs_service, metadata_servic
),
call(
queue_url=MOCK_LG_METADATA_SQS_QUEUE,
message_body=EXPECTED_SQS_MSG_FOR_PATIENT_1234567891,
nhs_number="1234567891",
message_body=EXPECTED_SQS_MSG_FOR_PATIENT_123456789,
nhs_number="123456789",
group_id="bulk_upload_123412342",
),
]
Expand Down

0 comments on commit 1427b0c

Please sign in to comment.