Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
  • Loading branch information
stijnvermeeren-swisstopo committed May 22, 2024
2 parents edb3e04 + ddf82d1 commit 4b2cc38
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 18 deletions.
21 changes: 12 additions & 9 deletions src/stratigraphy/util/find_depth_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ def depth_column_entries(all_words: list[TextWord], include_splits: bool) -> lis
entries.append(DepthColumnEntry(word.rect, value))
elif include_splits:
# support for e.g. "1.10-1.60m" extracted as a single word
entries.extend(extract_layer_depth_interval_entries(input_string, word.rect))
layer_depth_column_entry = extract_layer_depth_interval_entries(input_string, word.rect)
entries.extend(
[layer_depth_column_entry.start, layer_depth_column_entry.end] if layer_depth_column_entry else []
)
except ValueError:
pass
return entries
Expand All @@ -46,8 +49,8 @@ def value_as_float(string_value: str) -> float: # noqa: D103

def extract_layer_depth_interval_entries(
text: str, rect: fitz.Rect, require_start_of_string: bool = True
) -> list[DepthColumnEntry]:
"""Extracts two DepthColumnEntry obejcts from a string to represent a layer depth interval.
) -> LayerDepthColumnEntry | None:
"""Extracts a LayerDepthColumnEntry from a string.
Args:
text (str): The string to extract the depth interval from.
Expand All @@ -56,10 +59,10 @@ def extract_layer_depth_interval_entries(
at the start of a string. Defaults to True.
Returns:
list[DepthColumnEntry]: The extracted depth column entries. Either two entries or an empty list.
LayerDepthColumnEntry | None: The extracted LayerDepthColumnEntry or None if none is found.
"""
input_string = text.strip().replace(",", ".")
entries = []

query = r"-?([0-9]+(\.[0-9]+)?)[müMN\]*[\s-]+([0-9]+(\.[0-9]+)?)[müMN\\.]*"
if not require_start_of_string:
query = r".*?" + query
Expand All @@ -68,13 +71,13 @@ def extract_layer_depth_interval_entries(
if match:
value1 = value_as_float(match.group(1))
first_half_rect = fitz.Rect(rect.x0, rect.y0, rect.x1 - rect.width / 2, rect.y1)
entries.append(DepthColumnEntry(first_half_rect, value1))

value2 = value_as_float(match.group(3))
second_half_rect = fitz.Rect(rect.x0 + rect.width / 2, rect.y0, rect.x1, rect.y1)
entries.append(DepthColumnEntry(second_half_rect, value2))

return entries
return LayerDepthColumnEntry(
DepthColumnEntry(first_half_rect, value1), DepthColumnEntry(second_half_rect, value2)
)
return None


def find_layer_depth_columns(entries: list[DepthColumnEntry], all_words: list[TextWord]) -> list[LayerDepthColumn]:
Expand Down
16 changes: 7 additions & 9 deletions src/stratigraphy/util/layer_identifier_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import re

import fitz
import numpy as np

from stratigraphy.util.depthcolumn import LayerDepthColumnEntry
from stratigraphy.util.find_depth_columns import extract_layer_depth_interval_entries
Expand Down Expand Up @@ -133,21 +132,20 @@ def get_depth_interval(self, block: TextBlock) -> LayerDepthColumnEntry:
depth_entries = []
for line in block.lines:
try:
new_entries = extract_layer_depth_interval_entries(line.text, line.rect, require_start_of_string=False)
layer_depth_entry = extract_layer_depth_interval_entries(
line.text, line.rect, require_start_of_string=False
)
# require_start_of_string = False because the depth interval may not always start at the beginning
# of the line e.g. "Remblais Heterogene: 0.00 - 0.5m"
if new_entries:
depth_entries.append(new_entries)
if layer_depth_entry:
depth_entries.append(layer_depth_entry)
except ValueError:
pass

if depth_entries:
# Merge the sub layers into one depth interval.
start_idx = np.argmin([entry[0].value for entry in depth_entries])
end_idx = np.argmax([entry[1].value for entry in depth_entries])

start = depth_entries[start_idx][0]
end = depth_entries[end_idx][1]
start = min([entry.start for entry in depth_entries], key=lambda start_entry: start_entry.value)
end = max([entry.end for entry in depth_entries], key=lambda end_entry: end_entry.value)

return LayerDepthColumnEntry(start, end)
else:
Expand Down

0 comments on commit 4b2cc38

Please sign in to comment.