From 48717ec2a818c1bdb96320ff258eee90b6edd33b Mon Sep 17 00:00:00 2001 From: Renato Durrer Date: Fri, 3 May 2024 09:25:34 +0200 Subject: [PATCH] Improve deduplication of lines; intermediate commit for debugging. --- src/stratigraphy/line_detection.py | 5 ++--- src/stratigraphy/util/dataclasses.py | 16 +++++++++++++--- .../util/geometric_line_utilities.py | 7 +++++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/stratigraphy/line_detection.py b/src/stratigraphy/line_detection.py index 411c4bd2..ac01aac1 100644 --- a/src/stratigraphy/line_detection.py +++ b/src/stratigraphy/line_detection.py @@ -54,9 +54,7 @@ def detect_lines_lsd(page: fitz.Page, scale_factor=2, lsd_params=None) -> ArrayL # Detect lines in the image lines = lsd.detect(gray)[0] - converted_lines = [line_from_array(line, scale_factor) for line in lines] - deduplicated_lines = deduplicate_lines(converted_lines) - return deduplicated_lines + return [line_from_array(line, scale_factor) for line in lines] def extract_lines(page: fitz.Page, line_detection_params: dict) -> list[Line]: @@ -75,6 +73,7 @@ def extract_lines(page: fitz.Page, line_detection_params: dict) -> list[Line]: scale_factor=line_detection_params["pdf_scale_factor"], ) lines = drop_vertical_lines(lines, threshold=line_detection_params["vertical_lines_threshold"]) + lines = deduplicate_lines(lines) merging_params = line_detection_params["line_merging_params"] if merging_params["use_clustering"]: lines = merge_parallel_lines_approximately( diff --git a/src/stratigraphy/util/dataclasses.py b/src/stratigraphy/util/dataclasses.py index c8f0c7ef..f6564015 100644 --- a/src/stratigraphy/util/dataclasses.py +++ b/src/stratigraphy/util/dataclasses.py @@ -71,6 +71,16 @@ def remove(self, line_index: str): del self.hashmap[line_index] def add(self, line: Line) -> str: - key = uuid.uuid4().hex - self.hashmap[key] = line - return key + if not self._check_if_present(line): + key = uuid.uuid4().hex + self.hashmap[key] = line + return key + else: + logger.warning("Line already present in IndexedLines.") + return None + + def _check_if_present(self, line: Line) -> bool: + return any( + value.start.distance_to(line.start) < 0.1 and value.end.distance_to(line.end) < 0.1 + for _key, value in self.hashmap.items() + ) diff --git a/src/stratigraphy/util/geometric_line_utilities.py b/src/stratigraphy/util/geometric_line_utilities.py index 4c9bd877..6e0a8ddf 100644 --- a/src/stratigraphy/util/geometric_line_utilities.py +++ b/src/stratigraphy/util/geometric_line_utilities.py @@ -37,8 +37,10 @@ def deduplicate_lines(lines: list[Line]) -> list[Line]: return deduplicated_lines -def _check_if_present(lines, line: Line) -> bool: - return any(value.start == line.start and value.end == line.end for value in lines) +def _check_if_present(lines: list[Line], line: Line) -> bool: + return any( + value.start.distance_to(line.start) < 0.1 and value.end.distance_to(line.end) < 0.1 for value in lines + ) # we are on a pixel grid and 0.1 is a reasonable threshold def drop_vertical_lines(lines: list[Line], threshold: float = 0.1) -> ArrayLike: @@ -476,6 +478,7 @@ def merge_parallel_lines_quadtree(lines: list[Line], tol: int, angle_threshold: merged_any = True continue if merged_any: + print("Starting recursion.") return merge_parallel_lines_quadtree( list(indexed_lines.hashmap.values()), tol=tol, angle_threshold=angle_threshold )