Skip to content

Commit

Permalink
Set lodgement date of OCRs created via bulk importer to the current d…
Browse files Browse the repository at this point in the history
…atetime. Add support for validating associated files in schemas. Add some db performance tweaks for ocr bulk import related models.
  • Loading branch information
oakdbca committed Oct 16, 2024
1 parent 69c8827 commit b506d87
Showing 1 changed file with 79 additions and 17 deletions.
96 changes: 79 additions & 17 deletions boranga/components/occurrence/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,15 @@
from django.db.models import CharField, Count, Func, ManyToManyField, Max, Q
from django.db.models.functions import Cast, Length
from django.utils import timezone
from django.utils.functional import cached_property
from ledger_api_client.ledger_models import EmailUserRO as EmailUser
from ledger_api_client.managed_models import SystemGroup
from multiselectfield import MultiSelectField
from openpyxl.styles import NamedStyle
from openpyxl.styles.fonts import Font
from openpyxl.utils import get_column_letter
from openpyxl.worksheet.datavalidation import DataValidation
from ordered_model.models import OrderedModel
from ordered_model.models import OrderedModel, OrderedModelManager
from taggit.managers import TaggableManager

from boranga import exceptions
Expand Down Expand Up @@ -5758,6 +5759,7 @@ def process_row(self, ocr_migrated_from_ids, index, headers, row, errors):
current_model_instance.import_hash = row_hash
current_model_instance.group_type_id = self.schema.group_type_id
current_model_instance.submitter = self.email_user
current_model_instance.lodgement_date = timezone.now()
else:
current_model_instance = OccurrenceReport.objects.get(
migrated_from_id=row[0]
Expand Down Expand Up @@ -5801,6 +5803,8 @@ def process_row(self, ocr_migrated_from_ids, index, headers, row, errors):
return
else:
if not occ_migrated_from_id:
if not model_data.get("group_type"):
model_data["group_type"] = self.schema.group_type
current_model_instance = Occurrence(**model_data)
else:
if Occurrence.objects.filter(
Expand Down Expand Up @@ -5876,15 +5880,14 @@ def process_row(self, ocr_migrated_from_ids, index, headers, row, errors):
if current_model_name not in [
OccurrenceReport._meta.model_name,
Occurrence._meta.model_name,
SubmitterInformation._meta.model_name,
]:
# Relate this model to it's parent instance

related_to_parent = False
# Look through all the model instances that have already been saved
for potential_parent_model_key in [m for m in model_instances]:

# Look through all the models being imported except for the current model
for potential_parent_model_key in [
m for m in models if m != current_model_name
]:
# Check if this model has a relationship with the current model
potential_parent_instance = model_instances[
potential_parent_model_key
Expand Down Expand Up @@ -6415,10 +6418,34 @@ def validate(self, request_user_id: int):
None,
None,
)
sample_associated_file = InMemoryUploadedFile(
BytesIO(b"I am a test file"),
"file",
"sample_file.txt",
"text/plain",
0,
None,
)
# Create a .zip file to house the sample associated file
zip_buffer = BytesIO()
with zipfile.ZipFile(zip_buffer, "w") as zip_file:
zip_file.writestr(
sample_associated_file.name, sample_associated_file.read()
)
zip_file_name = f"sample_{self.group_type.name}_{self.version}.zip"
zip_file = InMemoryUploadedFile(
zip_buffer,
"file",
zip_file_name,
"application/zip",
0,
None,
)

import_task = OccurrenceReportBulkImportTask(
schema=self,
_file=import_file,
_associated_files_zip=zip_file,
rows=1,
email_user=request_user_id,
)
Expand Down Expand Up @@ -6544,7 +6571,13 @@ def copy(self, request):
return new_schema


class OccurrenceReportBulkImportSchemaColumnManager(OrderedModelManager):
def get_queryset(self):
return super().get_queryset().prefetch_related("lookup_filters")


class OccurrenceReportBulkImportSchemaColumn(OrderedModel):
objects = OccurrenceReportBulkImportSchemaColumnManager()
schema = models.ForeignKey(
OccurrenceReportBulkImportSchema,
related_name="columns",
Expand Down Expand Up @@ -6667,7 +6700,7 @@ def choices(self):

return get_choices_for_field(model_class, self.field)

@property
@cached_property
def related_model(self):
if not self.django_import_content_type or not self.django_import_field_name:
return None
Expand All @@ -6680,7 +6713,7 @@ def related_model(self):

return field.related_model

@property
@cached_property
def display_field(self):
related_model = self.related_model

Expand All @@ -6694,22 +6727,24 @@ def display_field(self):

return display_field

@property
@cached_property
def related_model_qs(self):
display_field = self.display_field

if not display_field:
return None

filter_dict = {f"{display_field}__isnull": False}
related_model_qs = self.related_model.objects.filter(**filter_dict)

if issubclass(self.related_model, ArchivableModel):
related_model_qs = self.related_model.objects.exclude(archived=True)

return related_model_qs
if hasattr(self.related_model, "group_type"):
related_model_qs = related_model_qs.only(display_field, "group_type")
else:
related_model_qs = related_model_qs.only(display_field)

@property
return related_model_qs.order_by(display_field)

@cached_property
def filtered_related_model_qs(self):
if not self.related_model_qs:
return None
Expand Down Expand Up @@ -6751,7 +6786,7 @@ def filtered_related_model_qs(self):

return related_model_qs

@property
@cached_property
def foreign_key_count(self):
if not self.related_model_qs:
return 0
Expand All @@ -6761,7 +6796,7 @@ def foreign_key_count(self):
# No longer being shown
return self.related_model_qs.count()

@property
@cached_property
def requires_lookup_field(self):
if not self.django_import_content_type or not self.django_import_field_name:
return False
Expand All @@ -6777,7 +6812,7 @@ def requires_lookup_field(self):
self.foreign_key_count > settings.OCR_BULK_IMPORT_LOOKUP_TABLE_RECORD_LIMIT
)

@property
@cached_property
def filtered_foreign_key_count(self):
if not self.filtered_related_model_qs:
return 0
Expand Down Expand Up @@ -6855,6 +6890,28 @@ def get_sample_value(self, errors, species_or_community_identifier=None):

return random_value

if isinstance(field, models.ManyToManyField):
related_model_qs = self.filtered_related_model_qs

if not related_model_qs.exists():
error_message = f"No records found for many to many field {field.related_model._meta.model_name}"
errors.append(
{
"error_type": "no_records",
"error_message": error_message,
}
)

display_field = self.display_field

random_values = list(
related_model_qs.order_by("?")
.values_list(display_field, flat=True)
.distinct()[: random.randint(1, 3)]
)

return ",".join(random_values)

if isinstance(field, MultiSelectField):
model_class = self.django_import_content_type.model_class()
# Unfortunatly have to have an actual model instance to get the choices
Expand Down Expand Up @@ -6980,6 +7037,9 @@ def get_sample_value(self, errors, species_or_community_identifier=None):
}
)

if isinstance(field, models.FileField):
return "sample_file.txt"

raise ValueError(
f"Not able to generate sample data for field {field} of type {type(field)}"
)
Expand Down Expand Up @@ -7448,7 +7508,9 @@ def validate(self, task, cell_value, mode, index, headers, row, errors):
return cell_value, errors_added

# Make the datetime object timezone aware
cell_value.replace(tzinfo=zoneinfo.ZoneInfo(settings.TIME_ZONE))
cell_value = cell_value.replace(
tzinfo=zoneinfo.ZoneInfo(settings.TIME_ZONE)
)

return cell_value, errors_added

Expand Down

0 comments on commit b506d87

Please sign in to comment.