Skip to content

Commit

Permalink
Tune arithmetic progression threshold.
Browse files Browse the repository at this point in the history
  • Loading branch information
redur committed Jun 20, 2024
1 parent 7e97b11 commit a8e3002
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
14 changes: 9 additions & 5 deletions src/stratigraphy/util/depthcolumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,11 +265,11 @@ def significant_arithmetic_progression(self) -> bool:
if len(self.entries) < 6:
return self.is_arithmetic_progression()
else:
# to allow for OCR errors or gaps in the progression, we only require a segment of length 6 that is an
# to allow for OCR errors or gaps in the progression, we only require a segment of length 7 that is an
# arithmetic progression
for i in range(len(self.entries) - 6 + 1):
for i in range(len(self.entries) - 7 + 1):
if BoundaryDepthColumn(
self.noise_count_threshold, self.noise_count_offset, self.entries[i : i + 6]
self.noise_count_threshold, self.noise_count_offset, self.entries[i : i + 7]
).is_arithmetic_progression():
return True
return False
Expand Down Expand Up @@ -322,10 +322,14 @@ def is_valid(self, all_words: list[TextLine]) -> bool:
):
return False
# Check if the entries are strictly increasing.
if not all(i.value < j.value for i, j in zip(self.entries, self.entries[1:], strict=False)):
if (
not all(i.value < j.value for i, j in zip(self.entries, self.entries[1:], strict=False))
and len(self.entries) < 15
):
return False

corr_coef = self.pearson_correlation_coef()

return (
corr_coef and corr_coef > np.min([1.0382 - len(self.entries) * 0.01, 0.9985]) and corr_coef > 0.95
) # Magic numbers obtained using an error analysis on critical borehole profiles. Admittedly, this may
Expand Down Expand Up @@ -396,7 +400,7 @@ def correct_typos(self, all_words) -> BoundaryDepthColumn | None:
),
)
)
best_columns = max(new_columns, key=lambda column: column[1].pearson_correlation_coef())
_removed_entry, best_columns = max(new_columns, key=lambda column: column[1].pearson_correlation_coef())

if best_columns.is_valid(all_words):
return best_columns
Expand Down
5 changes: 2 additions & 3 deletions src/stratigraphy/util/find_depth_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ def find_depth_columns(
Returns:
list[BoundaryDepthColumn]: Found BoundaryDepthColumn objects.
"""
# print(f"{entries=}")
numeric_columns: list[BoundaryDepthColumn] = []
for entry in entries:
has_match = False
Expand All @@ -186,7 +185,7 @@ def find_depth_columns(
for column in numeric_columns
if all(not other.strictly_contains(column) for other in numeric_columns)
]
# print(f"{numeric_columns=}")

numeric_columns = [
column.reduce_until_valid(all_words)
for numeric_column in numeric_columns
Expand All @@ -195,7 +194,7 @@ def find_depth_columns(
# that does not match the descriptions
if not column.significant_arithmetic_progression()
]
# print(f"{numeric_columns=}")

return sorted(
[column for column in numeric_columns if column and column.is_valid(all_words)],
key=lambda column: len(column.entries),
Expand Down

0 comments on commit a8e3002

Please sign in to comment.