Skip to content

Commit

Permalink
LGVISIUM-102: LayerIdentifierColumn extends from DepthColumn
Browse files Browse the repository at this point in the history
  • Loading branch information
stijnvermeeren-swisstopo committed Nov 11, 2024
1 parent 45e696c commit 203e11d
Show file tree
Hide file tree
Showing 15 changed files with 302 additions and 568 deletions.
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ dependencies = [
"boto3",
"pandas",
"levenshtein",
"pathlib",
"python-dotenv",
"setuptools",
"tqdm",
Expand Down
6 changes: 3 additions & 3 deletions src/stratigraphy/annotations/draw.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from dotenv import load_dotenv
from stratigraphy.data_extractor.data_extractor import FeatureOnPage
from stratigraphy.depthcolumn.depthcolumn import DepthColumn
from stratigraphy.depths_materials_column_pairs.depths_materials_column_pairs import DepthsMaterialsColumnPairs
from stratigraphy.depths_materials_column_pairs.depths_materials_column_pairs import DepthsMaterialsColumnPair
from stratigraphy.groundwater.groundwater_extraction import Groundwater
from stratigraphy.layer.layer import Layer
from stratigraphy.metadata.coordinate_extraction import Coordinate
Expand Down Expand Up @@ -98,7 +98,7 @@ def draw_predictions(
draw_depth_columns_and_material_rect(
shape,
page.derotation_matrix,
[pair for pair in depths_materials_column_pairs if pair.page == page_number],
[pair for pair in depths_materials_column_pairs if pair.depth_column.page == page_number],
)
draw_material_descriptions(
shape,
Expand Down Expand Up @@ -245,7 +245,7 @@ def draw_material_descriptions(shape: fitz.Shape, derotation_matrix: fitz.Matrix


def draw_depth_columns_and_material_rect(
shape: fitz.Shape, derotation_matrix: fitz.Matrix, depths_materials_column_pairs: list[DepthsMaterialsColumnPairs]
shape: fitz.Shape, derotation_matrix: fitz.Matrix, depths_materials_column_pairs: list[DepthsMaterialsColumnPair]
):
"""Draw depth columns as well as the material rects on a pdf page.
Expand Down
14 changes: 6 additions & 8 deletions src/stratigraphy/depthcolumn/boundarydepthcolumnvalidator.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,26 @@ def is_valid(self, column: BoundaryDepthColumn, corr_coef_threshold: float = 0.9

return corr_coef and corr_coef > corr_coef_threshold

def reduce_until_valid(self, column: BoundaryDepthColumn, page_number: int) -> BoundaryDepthColumn:
def reduce_until_valid(self, column: BoundaryDepthColumn) -> BoundaryDepthColumn:
"""Removes entries from the depth column until it fulfills the is_valid condition.
is_valid checks whether there is too much noise (i.e. other text) in the column and whether the entries are
linearly correlated with their vertical position.
Args:
column (BoundaryDepthColumn): The depth column to validate
page_number (int): The page number of the depth column
Returns:
BoundaryDepthColumn: The current depth column with entries removed until it is valid.
"""
while column:
if self.is_valid(column):
return column
elif self.correct_OCR_mistakes(column, page_number) is not None:
return self.correct_OCR_mistakes(column, page_number)
elif self.correct_OCR_mistakes(column) is not None:
return self.correct_OCR_mistakes(column)
else:
column = column.remove_entry_by_correlation_gradient()

def correct_OCR_mistakes(self, column: BoundaryDepthColumn, page_number: int) -> BoundaryDepthColumn | None:
def correct_OCR_mistakes(self, column: BoundaryDepthColumn) -> BoundaryDepthColumn | None:
"""Corrects OCR mistakes in the depth column entries.
Loops through all values and corrects common OCR mistakes for the given entry. Then, the column with the
Expand All @@ -102,15 +101,14 @@ def correct_OCR_mistakes(self, column: BoundaryDepthColumn, page_number: int) ->
Args:
column (BoundaryDepthColumn): The depth column to validate
page_number (int): The page number of the depth column
Returns:
BoundaryDepthColumn | None: The corrected depth column, or None if no correction was possible.
"""
new_columns = [BoundaryDepthColumn()]
new_columns = [BoundaryDepthColumn(entries=[], page=column.page)]
for entry in column.entries:
new_columns = [
BoundaryDepthColumn([*column.entries, DepthColumnEntry(entry.rect, new_value, page_number)])
BoundaryDepthColumn([*column.entries, DepthColumnEntry(entry.rect, new_value)], page=column.page)
for column in new_columns
for new_value in _value_alternatives(entry.value)
]
Expand Down
Loading

0 comments on commit 203e11d

Please sign in to comment.