Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Close #LGVISIUM-77: Wrong elevation value selected within bounding box #91

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/stratigraphy/data_extractor/data_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,11 @@ def get_lines_near_key(self, lines, key_line: TextLine) -> list[TextLine]:
)
feature_lines = [line for line in lines if line.rect.intersects(elevation_search_rect)]

# makes sure the line with the key is included first in the extracted information and the duplicate removed
# Insert key_line first and remove duplicates
feature_lines.insert(0, key_line)
return list(dict.fromkeys(feature_lines))
feature_lines = list(dict.fromkeys(feature_lines))

# Sort by vertical distance between the top of the feature line and the top of key_line
feature_lines_sorted = sorted(feature_lines, key=lambda line: abs(line.rect.y0 - key_line.rect.y0))

return feature_lines_sorted
2 changes: 1 addition & 1 deletion src/stratigraphy/groundwater/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def extract_elevation(text: str) -> float | None:
elevation_patterns = [
r"(\d+(\.\d+)?)\s*m\s*u\.m\.",
r"(\d+(\.\d+)?)\s*m\s*ur.",
r"(\d{3,}\.\d+)",
r"(\d{3,}\.\d{1,2})(?!\d)", # Matches a float number with less than 2 digits after the decimal point
r"(\d{3,})\s*m",
]

Expand Down
20 changes: 10 additions & 10 deletions src/stratigraphy/metadata/elevation_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,35 +89,35 @@ def get_elevation_near_key(self, lines: list[TextLine], page: int) -> Elevation
"""
# find the key that indicates the elevation information
elevation_key_lines = self.find_feature_key(lines)
extracted_elevation_informations = []
extracted_elevation_list = []

for elevation_key_line in elevation_key_lines:
elevation_lines = self.get_lines_near_key(lines, elevation_key_line)
elevation_lines = self.get_lines_near_key(lines, elevation_key_line) # Check the sorting of the lines

try:
extracted_elevation_information = self.get_elevation_from_lines(elevation_lines, page)
if extracted_elevation_information.elevation:
extracted_elevation_informations.append(extracted_elevation_information)
extracted_elevation = self.get_elevation_from_lines(elevation_lines, page)
if extracted_elevation.elevation:
extracted_elevation_list.append(extracted_elevation)
except ValueError as error:
logger.warning("ValueError: %s", error)
logger.warning("Could not extract all required information from the lines provided.")

return self.select_best_elevation_information(extracted_elevation_informations)
return self.select_best_elevation_information(extracted_elevation_list)

def select_best_elevation_information(self, extracted_elevation_informations: list[Elevation]) -> Elevation | None:
def select_best_elevation_information(self, extracted_elevation_list: list[Elevation]) -> Elevation | None:
"""Select the best elevation information from a list of extracted elevation information.

Args:
extracted_elevation_informations (list[Elevation]): A list of extracted elevation information.
extracted_elevation_list (list[Elevation]): A list of extracted elevation information.

Returns:
Elevation | None: The best extracted elevation information.
"""
# Sort the extracted elevation information by elevation with the highest elevation first
extracted_elevation_informations.sort(key=lambda x: x.elevation, reverse=True)
extracted_elevation_list.sort(key=lambda x: x.elevation, reverse=True)

# Return the first element of the sorted list
return extracted_elevation_informations[0] if extracted_elevation_informations else None
return extracted_elevation_list[0] if extracted_elevation_list else None

def get_elevation_from_lines(self, lines: list[TextLine], page: int) -> Elevation:
r"""Matches the elevation in a string of text.
Expand Down
Loading