Skip to content

Commit

Permalink
Merge pull request #78 from swisstopo/LGVISIUM-67-Make-the-naming-of-…
Browse files Browse the repository at this point in the history
…dates-more-consistent-across-the-code

Close #LGVISIUM-67: Addressed the naming issues with the date
  • Loading branch information
dcleres authored Sep 11, 2024
2 parents b7d34ce + 7595cf1 commit 8c8cd3e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 34 deletions.
61 changes: 34 additions & 27 deletions src/stratigraphy/groundwater/groundwater_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import abc
import logging
from dataclasses import dataclass
from datetime import date, datetime
from datetime import date as dt
from datetime import datetime

import fitz
import numpy as np
Expand All @@ -26,7 +27,7 @@ class GroundwaterInformation(metaclass=abc.ABCMeta):
"""Abstract class for Groundwater Information."""

depth: float # Depth of the groundwater relative to the surface
measurement_date: date | None = (
date: dt | None = (
None # Date of the groundwater measurement, if several dates
# are present, the date of the document the last measurement is taken
)
Expand All @@ -48,24 +49,34 @@ def __str__(self) -> str:
"""
return (
f"GroundwaterInformation("
f"measurement_date={self.format_measurement_date()}, "
f"date={self.format_date()}, "
f"depth={self.depth}, "
f"elevation={self.elevation})"
)

@staticmethod
def from_json_values(depth: float | None, measurement_date: str | None, elevation: float | None):
if measurement_date is not None and measurement_date != "":
# convert to datetime object
measurement_date = datetime.strptime(measurement_date, DATE_FORMAT).date()
else:
measurement_date = None
def from_json_values(depth: float | None, date: str | None, elevation: float | None):
"""Converts the object from a dictionary.
return GroundwaterInformation(depth=depth, measurement_date=measurement_date, elevation=elevation)
Args:
depth (float | None): The depth of the groundwater.
date (str | None): The measurement date of the groundwater.
elevation (float | None): The elevation of the groundwater.
def format_measurement_date(self) -> str | None:
if self.measurement_date is not None:
return self.measurement_date.strftime(DATE_FORMAT)
Returns:
GroundwaterInformation: The object created from the dictionary.
"""
date = datetime.strptime(date, DATE_FORMAT).date() if date is not None and date != "" else None
return GroundwaterInformation(depth=depth, date=date, elevation=elevation)

def format_date(self) -> str | None:
"""Formats the date of the groundwater measurement.
Returns:
str | None: The formatted date of the groundwater measurement.
"""
if self.date is not None:
return self.date.strftime(DATE_FORMAT)
else:
return None

Expand All @@ -91,21 +102,19 @@ def to_dict(self) -> dict:
dict: The object as a dictionary.
"""
return {
"measurement_date": self.groundwater.format_measurement_date(),
"date": self.groundwater.format_date(),
"depth": self.groundwater.depth,
"elevation": self.groundwater.elevation,
"page": self.page if self.page else None,
"rect": [self.rect.x0, self.rect.y0, self.rect.x1, self.rect.y1] if self.rect else None,
}

@staticmethod
def from_json_values(
measurement_date: str | None, depth: float | None, elevation: float | None, page: int, rect: list[float]
):
def from_json_values(date: str | None, depth: float | None, elevation: float | None, page: int, rect: list[float]):
"""Converts the object from a dictionary.
Args:
measurement_date (str | None): The measurement date of the groundwater.
date (str | None): The measurement date of the groundwater.
depth (float | None): The depth of the groundwater.
elevation (float | None): The elevation of the groundwater.
page (int): The page number of the PDF document.
Expand All @@ -115,9 +124,7 @@ def from_json_values(
GroundwaterInformationOnPage: The object created from the dictionary.
"""
return GroundwaterInformationOnPage(
groundwater=GroundwaterInformation.from_json_values(
depth=depth, measurement_date=measurement_date, elevation=elevation
),
groundwater=GroundwaterInformation.from_json_values(depth=depth, date=date, elevation=elevation),
page=page,
rect=fitz.Rect(rect),
)
Expand Down Expand Up @@ -178,7 +185,7 @@ def get_groundwater_info_from_lines(self, lines: list[TextLine], page: int) -> G
Returns:
GroundwaterInformationOnPage: the extracted groundwater information
"""
datetime_date: date | None = None
date: dt | None = None
depth: float | None = None
elevation: float | None = None

Expand All @@ -193,7 +200,7 @@ def get_groundwater_info_from_lines(self, lines: list[TextLine], page: int) -> G
extracted_date, extracted_date_str = extract_date(text)
if extracted_date_str:
text = text.replace(extracted_date_str, "").strip()
datetime_date = extracted_date
date = extracted_date

depth = extract_depth(text, MAX_DEPTH)
if depth:
Expand All @@ -205,11 +212,11 @@ def get_groundwater_info_from_lines(self, lines: list[TextLine], page: int) -> G
matched_lines_rect.append(line.rect)
else:
# Pattern for matching date
if not datetime_date:
if not date:
extracted_date, extracted_date_str = extract_date(text)
if extracted_date_str:
text = text.replace(extracted_date_str, "").strip()
datetime_date = extracted_date
date = extracted_date

# Pattern for matching depth (e.g., "1,48 m u.T.")
if not depth:
Expand All @@ -225,7 +232,7 @@ def get_groundwater_info_from_lines(self, lines: list[TextLine], page: int) -> G
matched_lines_rect.append(line.rect)

# If all required data is found, break early
if datetime_date and depth and elevation:
if date and depth and elevation:
break

# Get the union of all matched lines' rectangles
Expand Down Expand Up @@ -253,7 +260,7 @@ def get_groundwater_info_from_lines(self, lines: list[TextLine], page: int) -> G
# drilling date - chose the date of the document. Date needs to be extracted from the document separately)
if depth:
return GroundwaterInformationOnPage(
groundwater=GroundwaterInformation(depth=depth, measurement_date=datetime_date, elevation=elevation),
groundwater=GroundwaterInformation(depth=depth, date=date, elevation=elevation),
rect=rect_union,
page=page,
)
Expand Down
4 changes: 2 additions & 2 deletions src/stratigraphy/groundwater/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ def extract_date(text: str) -> tuple[date | None, str | None]:

for date_format in ("%d.%m.%Y", "%d.%m.%y"):
try:
measurement_date = datetime.strptime(date_str, date_format).date()
return measurement_date, date_str
date = datetime.strptime(date_str, date_format).date()
return date, date_str
except ValueError:
continue

Expand Down
10 changes: 5 additions & 5 deletions src/stratigraphy/util/predictions.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def evaluate_groundwater(self, groundwater_ground_truth: list):
gt_groundwater = [
GroundwaterInformation.from_json_values(
depth=json_gt_data["depth"],
measurement_date=json_gt_data["date"],
date=json_gt_data["date"],
elevation=json_gt_data["elevation"],
)
for json_gt_data in groundwater_ground_truth
Expand All @@ -289,12 +289,12 @@ def evaluate_groundwater(self, groundwater_ground_truth: list):
[
(
entry.groundwater.depth,
entry.groundwater.format_measurement_date(),
entry.groundwater.format_date(),
entry.groundwater.elevation,
)
for entry in self.groundwater_entries
],
[(entry.depth, entry.format_measurement_date(), entry.elevation) for entry in gt_groundwater],
[(entry.depth, entry.format_date(), entry.elevation) for entry in gt_groundwater],
)
self.groundwater_is_correct["groundwater_depth"] = self.count_against_ground_truth(
[entry.groundwater.depth for entry in self.groundwater_entries],
Expand All @@ -305,8 +305,8 @@ def evaluate_groundwater(self, groundwater_ground_truth: list):
[entry.elevation for entry in gt_groundwater],
)
self.groundwater_is_correct["groundwater_date"] = self.count_against_ground_truth(
[entry.groundwater.measurement_date for entry in self.groundwater_entries],
[entry.measurement_date for entry in gt_groundwater],
[entry.groundwater.date for entry in self.groundwater_entries],
[entry.date for entry in gt_groundwater],
)

@staticmethod
Expand Down

1 comment on commit 8c8cd3e

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
src/stratigraphy
   __init__.py8188%11
   extract.py1881880%3–491
   get_files.py19190%3–47
   line_detection.py26260%3–76
   main.py1191190%3–274
src/stratigraphy/coordinates
   coordinate_extraction.py108595%30, 64, 83–84, 96
src/stratigraphy/data_extractor
   data_extractor.py50394%32, 62, 98
src/stratigraphy/util
   boundarydepthcolumnvalidator.py412051%47, 57, 60, 81–84, 110–128, 140–149
   dataclasses.py32391%37–39
   depthcolumn.py1946467%26, 30, 51, 57, 60–61, 85, 88, 95, 102, 110–111, 121, 138–154, 192, 229, 248–256, 267, 272, 279, 310, 315–322, 337–338, 381–423
   depthcolumnentry.py28679%17, 21, 36, 39, 56, 65
   description_block_splitter.py70297%25, 140
   draw.py1171170%3–349
   duplicate_detection.py51510%3–146
   extract_text.py31487%20, 36, 57–58
   find_depth_columns.py91693%42–43, 73, 86, 180–181
   find_description.py632856%27–35, 50–63, 79–95, 172–175
   geometric_line_utilities.py86298%82, 132
   interval.py1045547%25–28, 33–36, 42, 48, 52, 62–64, 101–147, 168, 174–190
   language_detection.py18180%3–45
   layer_identifier_column.py91910%3–234
   line.py51492%26, 51, 61, 111
   linesquadtree.py46198%76
   plot_utils.py43430%3–120
   predictions.py1541540%3–364
   textblock.py80989%29, 57, 65, 90, 102, 125, 146, 155, 184
   util.py391756%22, 40–47, 61–63, 87–88, 100–104
TOTAL1948105646% 

Tests Skipped Failures Errors Time
79 0 💤 0 ❌ 0 🔥 5.569s ⏱️

Please sign in to comment.