Skip to content

Commit

Permalink
Improve depth entry recognition for cases such as '.80'.
Browse files Browse the repository at this point in the history
  • Loading branch information
redur committed Jun 24, 2024
1 parent b96efa8 commit 8132e75
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/stratigraphy/util/boundarydepthcolumnvalidator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""This module contains logic to validate BoundaryDepthColumn instances."""

import dataclasses
import re

from stratigraphy.util.depthcolumn import BoundaryDepthColumn
from stratigraphy.util.depthcolumnentry import DepthColumnEntry
Expand Down Expand Up @@ -129,4 +130,11 @@ def _correct_entry(entry: DepthColumnEntry) -> DepthColumnEntry:
"""
text_value = str(entry.value)
text_value = text_value.replace("4", "1") # In older documents, OCR sometimes mistakes 1 for 4

# replace a pattern such as '.80' with '0.80'. These cases are already converted
# to '80.0' when depth entries are recognized. Whe therefore look at patterns such as '80.0'
# that start with a digit, followed by a '0.0'. We then replace it with a pattern such as '0.80'.
if re.match(r"^[0-9]0\.0$", text_value):
text_value = text_value.replace(".", "")
text_value = "0." + text_value
return DepthColumnEntry(entry.rect, float(text_value))

0 comments on commit 8132e75

Please sign in to comment.