Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Close #LGVISIUM-74: Update the API based on the new s3 specs & feedback #79

Merged
merged 2 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/app/api/v1/endpoints/create_pngs.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def create_pngs(aws_filename: Path):
pix = page.get_pixmap(matrix=fitz.Matrix(3, 3))
png_filename = f"{filename}-{page_number + 1}.png"
png_path = f"/tmp/{png_filename}" # Local path to save the PNG
s3_bucket_png_path = f"pngs/{png_filename}"
s3_bucket_png_path = f"dataextraction/{png_filename}"

pix.save(png_path)

Expand Down
12 changes: 6 additions & 6 deletions src/app/common/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def load_pdf_from_aws(filename: Path) -> fitz.Document:
"""
# Load the PDF from the S3 object
try:
data = load_data_from_aws(filename, "pdfs")
data = load_data_from_aws(filename)
pdf_document = fitz.open(stream=data, filetype="pdf")
except Exception:
raise HTTPException(
Expand All @@ -45,7 +45,7 @@ def load_png_from_aws(filename: Path) -> np.ndarray:
Returns:
ndarray: The loaded PNG image.
"""
data = load_data_from_aws(filename, "pngs")
data = load_data_from_aws(filename, "dataextraction")

# Convert the PNG data to an image using PIL
image = Image.open(io.BytesIO(data))
Expand All @@ -54,21 +54,21 @@ def load_png_from_aws(filename: Path) -> np.ndarray:
return np.array(image)


def load_data_from_aws(filename: Path, format: str) -> bytes:
def load_data_from_aws(filename: Path, prefix: str = "") -> bytes:
"""Load a document from AWS S3.

Args:
filename (str): The filename of the PNG image.
format (str): The format of the file.
prefix (str): The prefix of the file in the bucket.

Returns:
bytes: The loaded PNG image.
"""
# Check if the PNG exists in S3
try:
png_object = s3_client.get_object(Bucket=config.bucket_name, Key=str(format / filename))
png_object = s3_client.get_object(Bucket=config.bucket_name, Key=str(prefix / filename))
except s3_client.exceptions.NoSuchKey:
raise HTTPException(status_code=404, detail=f"Document {format + filename} not found in S3 bucket.") from None
raise HTTPException(status_code=404, detail=f"Document {prefix + filename} not found in S3 bucket.") from None

# Load the PNG from the S3 object
try:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_create_pngs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
from botocore.exceptions import ClientError
from fastapi.testclient import TestClient

TEST_PDF_KEY = "pdfs/sample.pdf"
TEST_PDF_KEY = "sample.pdf"
TEST_PDF_PATH = Path(__file__).parent.parent / "example" / "example_borehole_profile.pdf"
TEST_PNG_KEY = "pngs/sample-1.png"
TEST_PNG_KEY = "dataextraction/sample-1.png"
TEST_PNG_PATH = Path(__file__).parent.parent / "example" / "sample-1.png"


Expand Down
4 changes: 2 additions & 2 deletions tests/test_data_extraction_from_bbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
from app.common.schemas import ExtractDataRequest, FormatTypes
from fastapi.testclient import TestClient

TEST_PDF_KEY = Path("pdfs/sample.pdf")
TEST_PDF_KEY = Path("sample.pdf")
TEST_PDF_PATH = Path(__file__).parent.parent / "example" / "example_borehole_profile.pdf"
TEST_PNG_KEY = Path("pngs/sample-1.png")
TEST_PNG_KEY = Path("dataextraction/sample-1.png")
TEST_PNG_PATH = Path(__file__).parent.parent / "example" / "sample-1.png"


Expand Down
Loading