diff --git a/src/stratigraphy/depthcolumn/depthcolumnentry.py b/src/stratigraphy/depthcolumn/depthcolumnentry.py index ac469bb..f20200f 100644 --- a/src/stratigraphy/depthcolumn/depthcolumnentry.py +++ b/src/stratigraphy/depthcolumn/depthcolumnentry.py @@ -3,18 +3,19 @@ from __future__ import annotations import re +from dataclasses import dataclass from typing import Any import fitz from stratigraphy.lines.line import TextWord +@dataclass class DepthColumnEntry: # noqa: D101 """Class to represent a depth column entry.""" - def __init__(self, rect: fitz.Rect, value: float): - self.rect = rect - self.value = value + rect: fitz.Rect + value: float def __repr__(self) -> str: return str(self.value) @@ -61,10 +62,10 @@ def find_in_words(cls, all_words: list[TextWord], include_splits: bool) -> list[ entries.append(DepthColumnEntry(word.rect, value)) elif include_splits: # support for e.g. "1.10-1.60m" extracted as a single word - layer_depth_column_entry = AToBDepthColumnEntry.from_text(input_string, word.rect) + a_to_b_depth_column_entry = AToBDepthColumnEntry.from_text(input_string, word.rect) entries.extend( - [layer_depth_column_entry.start, layer_depth_column_entry.end] - if layer_depth_column_entry + [a_to_b_depth_column_entry.start, a_to_b_depth_column_entry.end] + if a_to_b_depth_column_entry else [] ) except ValueError: @@ -72,12 +73,14 @@ def find_in_words(cls, all_words: list[TextWord], include_splits: bool) -> list[ return entries +@dataclass class AToBDepthColumnEntry: # noqa: D101 - """Class to represent a layer depth column entry.""" + """Class to represent a depth column entry of the form "1m - 3m".""" - def __init__(self, start: DepthColumnEntry, end: DepthColumnEntry): - self.start = start - self.end = end + # TODO do we need both this class as well as AToBInterval, or can we combine the two classes? + + start: DepthColumnEntry + end: DepthColumnEntry def __repr__(self) -> str: return f"{self.start.value}-{self.end.value}" @@ -89,11 +92,7 @@ def rect(self) -> fitz.Rect: def to_json(self) -> dict[str, Any]: """Convert the layer depth column entry to a JSON serializable format.""" - return { - "start": self.start.to_json(), - "end": self.end.to_json(), - "rect": [self.rect.x0, self.rect.y0, self.rect.x1, self.rect.y1], - } + return {"start": self.start.to_json(), "end": self.end.to_json()} @classmethod def from_json(cls, data: dict) -> AToBDepthColumnEntry: diff --git a/src/stratigraphy/extract.py b/src/stratigraphy/extract.py index 469eecf..d8beeea 100644 --- a/src/stratigraphy/extract.py +++ b/src/stratigraphy/extract.py @@ -170,6 +170,7 @@ def process_page( rect=pair.block.rect, page=page_number, ), + # TODO don't automatically convert any interval to an AAboveBInterval depth_interval=AAboveBInterval(start=pair.depth_interval.start, end=pair.depth_interval.end) if pair.depth_interval else None,