Skip to content

Commit

Permalink
Merge branch 'main' into logging
Browse files Browse the repository at this point in the history
  • Loading branch information
kimpham54 authored Oct 17, 2024
2 parents f932cce + 960dfe7 commit 682eac8
Show file tree
Hide file tree
Showing 14 changed files with 224 additions and 3,962 deletions.
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
input/*
output/*
69 changes: 69 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: Tests and Style

on: [push, pull_request]

jobs:
build:
strategy:
matrix:
python-version: [3.11.4]
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Switch to Current Branch
run: git checkout ${{ env.BRANCH }}

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install -e .
- name: Run flake8
run: |
pip install flake8
# stop the build if there are flake8 errors
flake8 . --count --show-source --statistics
- name: Run unit tests
run: |
pip install pytest
python -m pytest src/jp2_remediator/tests/unit
- name: Run coverage
run: |
pip install coverage
python -m coverage run -p -m pytest src/jp2_remediator/tests/unit
python -m coverage combine
python -m coverage report -m --skip-covered
python -m coverage xml
# Fetch base branch for comparison (e.g., main)
- name: Fetch base branch
run: git fetch origin main

# Compare coverage with the base branch
- name: Compare coverage
run: |
pip install diff-cover
git checkout main
python -m coverage run -p -m pytest src/jp2_remediator/tests/unit
python -m coverage xml -o coverage-base.xml
git checkout -
python diff-cover --compare-branch=main coverage.xml
# Fail if coverage decreases
- name: Fail if coverage decreases
run: |
python diff-cover --compare-branch=main coverage.xml --fail-under=100
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
*~
*.swp

logs/

input/*
output/*
.coverage
coverage.*


dist/*
*/*.egg-info/*
__pycache__
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ pip install jp2_remediator==0.0.2
## Process all .jp2 files in the bucket:
`python3 box_reader.py --bucket remediation-folder`

##Process only files with a specific prefix (folder):
## Process only files with a specific prefix (folder):
`python3 box_reader.py --bucket remediation-folder --prefix testbatch_20240923`

`python3 box_reader.py --help`
Expand Down
14 changes: 14 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,17 @@ dependencies = []
[project.urls]
Homepage = "https://github.com/kimpham54/jp2_remediator"
Issues = "https://github.com/kimpham54/jp2_remediator/issues"

[tool.pytest.ini_options]
pythonpath = [
".", "src"
]

[tool.coverage.run]
omit = [
"**/tests/*"
]

[tool.project-paths]
dir_unit_out = "src/jp2_remediator/tests/out/"
dir_unit_resources = "src/jp2_remediator/tests/resources/"
10 changes: 10 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
boto3==1.35.39
botocore==1.35.39
jmespath==1.0.1
jpylyzer==2.2.1
project-paths==1.1.1
python-dateutil==2.9.0.post0
s3transfer==0.10.3
six==1.16.0
toml==0.10.2
urllib3==2.2.3
128 changes: 89 additions & 39 deletions src/jp2_remediator/box_reader.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import sys
# import sys
import os
import argparse
import boto3
import datetime
from jp2_remediator import configure_logger
from jpylyzer import jpylyzer
# from jpylyzer import jpylyzer
from jpylyzer import boxvalidator
from jpylyzer import byteconv

# from jpylyzer import byteconv


class BoxReader:
def __init__(self, file_path):
Expand All @@ -19,16 +22,22 @@ def __init__(self, file_path):
def read_file(self, file_path):
"""Reads the file content from the given path."""
try:
with open(file_path, 'rb') as file:
with open(file_path, "rb") as file:
return file.read()
except IOError as e:
self.logger.error(f"Error reading file {file_path}: {e}")
return None

def initialize_validator(self):
"""Initializes the jpylyzer BoxValidator for JP2 file validation."""
options = {'validationFormat': 'jp2', 'verboseFlag': True, 'nullxmlFlag': False, 'packetmarkersFlag': False}
self.validator = boxvalidator.BoxValidator(options, 'JP2', self.file_contents)
options = {
"validationFormat": "jp2",
"verboseFlag": True,
"nullxmlFlag": False,
"packetmarkersFlag": False,
}
self.validator = boxvalidator.BoxValidator(
options, "JP2", self.file_contents)
self.validator.validate()
return self.validator

Expand All @@ -37,14 +46,18 @@ def find_box_position(self, box_hex):
return self.file_contents.find(box_hex)

def check_boxes(self):
"""Checks for presence of 'jp2h' and 'colr' boxes in the file contents."""
jp2h_position = self.find_box_position(b'\x6a\x70\x32\x68') # search hex for 'jp2h'
"""Checks for presence of 'jp2h' and 'colr' boxes in file contents."""
jp2h_position = self.find_box_position(
b"\x6a\x70\x32\x68"
) # search hex for 'jp2h'
if jp2h_position != -1:
self.logger.debug(f"'jp2h' found at byte position: {jp2h_position}")
else:
self.logger.debug("'jp2h' not found in the file.")

colr_position = self.find_box_position(b'\x63\x6f\x6c\x72') # search hex for 'colr'
colr_position = self.find_box_position(
b"\x63\x6f\x6c\x72"
) # search hex for 'colr'
if colr_position != -1:
self.logger.debug(f"'colr' found at byte position: {colr_position}")
else:
Expand Down Expand Up @@ -77,7 +90,11 @@ def process_colr_box(self, colr_position):

return header_offset_position

def process_trc_tag(self, trc_hex, trc_name, new_contents, header_offset_position):
def process_trc_tag(self,
trc_hex,
trc_name,
new_contents,
header_offset_position):
"""Processes the TRC tag and modifies contents if necessary."""
trc_position = new_contents.find(trc_hex)
if trc_position == -1:
Expand All @@ -102,25 +119,35 @@ def process_trc_tag(self, trc_hex, trc_name, new_contents, header_offset_positio
self.logger.debug(f"Cannot calculate 'curv_{trc_name}_position' due to an unrecognized 'meth' value.")
return new_contents

curv_trc_position = trc_tag_offset + header_offset_position # start of curv profile data
curv_profile = new_contents[curv_trc_position:curv_trc_position + 12] # 12-byte curv profile data length
curv_trc_position = (
trc_tag_offset + header_offset_position
) # start of curv profile data
curv_profile = new_contents[
curv_trc_position: curv_trc_position + 12
] # 12-byte curv profile data length

if len(curv_profile) < 12:
self.logger.debug(f"Could not read the full 'curv' profile data for {trc_name}.")
return new_contents

curv_signature = curv_profile[0:4].decode('utf-8') # ICC.1:2022 Table 35 tag signature
curv_reserved = int.from_bytes(curv_profile[4:8], byteorder='big') # ICC.1:2022 Table 35 reserved 0's
curv_trc_gamma_n = int.from_bytes(curv_profile[8:12], byteorder='big') # # ICC.1:2022 Table 35 n value
curv_signature = curv_profile[0:4].decode(
"utf-8"
) # ICC.1:2022 Table 35 tag signature
curv_reserved = int.from_bytes(
curv_profile[4:8], byteorder="big"
) # ICC.1:2022 Table 35 reserved 0's
curv_trc_gamma_n = int.from_bytes(
curv_profile[8:12], byteorder="big"
) # # ICC.1:2022 Table 35 n value

self.logger.debug(f"'curv' Profile Signature for {trc_name}: {curv_signature}")
self.logger.debug(f"'curv' Reserved Value: {curv_reserved}")
self.logger.debug(f"'curv_{trc_name}_gamma_n' Value: {curv_trc_gamma_n}")

curv_trc_field_length = curv_trc_gamma_n * 2 + 12 # ICC.1:2022 Table 35 2n field length
self.logger.debug(f"'curv_{trc_name}_field_length': {curv_trc_field_length}")

# Check if curv_trc_gamma_n is not 1 and ask for confirmation to proceed, loops through all TRC tags
"""Check if curv_trc_gamma_n is not 1 and ask
for confirmation to proceed, loops through all TRC tags"""
if curv_trc_gamma_n != 1:
self.logger.warning(f"Warning: 'curv_{trc_name}_gamma_n' value is {curv_trc_gamma_n}, expected 1.")
proceed = input(f"Do you want to proceed with fixing the file {self.file_path}? (y/n): ").lower()
Expand All @@ -132,29 +159,33 @@ def process_trc_tag(self, trc_hex, trc_name, new_contents, header_offset_positio
self.logger.warning(f"'{trc_name}' Tag Size ({trc_tag_size}) does not match 'curv_{trc_name}_field_length' ({curv_trc_field_length}). Modifying the size...")
new_trc_size_bytes = curv_trc_field_length.to_bytes(4, byteorder='big')
new_contents[trc_position + 8: trc_position + 12] = new_trc_size_bytes

return new_contents

def process_all_trc_tags(self, header_offset_position):
"""Function to process 'TRC' tags (rTRC, gTRC, bTRC)."""
new_file_contents = bytearray(self.file_contents)
trc_tags = {
b'\x72\x54\x52\x43': 'rTRC', # search hex for 'rTRC'
b'\x67\x54\x52\x43': 'gTRC', # search hex for 'gTRC'
b'\x62\x54\x52\x43': 'bTRC' # search hex for 'bTRC'
b"\x72\x54\x52\x43": "rTRC", # search hex for 'rTRC'
b"\x67\x54\x52\x43": "gTRC", # search hex for 'gTRC'
b"\x62\x54\x52\x43": "bTRC", # search hex for 'bTRC'
}

for trc_hex, trc_name in trc_tags.items():
new_file_contents = self.process_trc_tag(trc_hex, trc_name, new_file_contents, header_offset_position)
new_file_contents = self.process_trc_tag(
trc_hex, trc_name, new_file_contents, header_offset_position
)

return new_file_contents

def write_modified_file(self, new_file_contents):
"""Writes the modified file contents to a new file if changes were made."""
"""Writes modified file contents to new file if changes were made."""
if new_file_contents != self.file_contents:
timestamp = datetime.datetime.now().strftime("%Y%m%d") # use "%Y%m%d_%H%M%S" for more precision
new_file_path = self.file_path.replace(".jp2", f"_modified_{timestamp}.jp2")
with open(new_file_path, 'wb') as new_file:
timestamp = datetime.datetime.now().strftime(
"%Y%m%d"
) # use "%Y%m%d_%H%M%S" for more precision
new_file_path = self.file_path.replace(
".jp2", f"_modified_{timestamp}.jp2")
with open(new_file_path, "wb") as new_file:
new_file.write(new_file_contents)
self.logger.info(f"New JP2 file created with modifications: {new_file_path}")
else:
Expand All @@ -174,40 +205,59 @@ def read_jp2_file(self):

self.write_modified_file(new_file_contents)


def process_directory(directory_path):
"""Process all JP2 files in a given directory."""
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith('.jp2'):
if file.lower().endswith(".jp2"):
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
reader = BoxReader(file_path)
reader.read_jp2_file()

def process_s3_bucket(bucket_name, prefix=''):

def process_s3_bucket(bucket_name, prefix=""):
"""Process all JP2 files in a given S3 bucket."""
s3 = boto3.client('s3')
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

if 'Contents' in response:
for obj in response['Contents']:
if obj['Key'].lower().endswith('.jp2'):
file_path = obj['Key']
print(f"Processing file: {file_path} from bucket {bucket_name}")
if "Contents" in response:
for obj in response["Contents"]:
if obj["Key"].lower().endswith(".jp2"):
file_path = obj["Key"]
print(f"""Processing file: {file_path} from bucket {
bucket_name
}""")
download_path = f"/tmp/{os.path.basename(file_path)}"
s3.download_file(bucket_name, file_path, download_path)
reader = BoxReader(download_path)
reader.read_jp2_file()
# Optionally, upload modified file back to S3
timestamp = datetime.datetime.now().strftime("%Y%m%d") # use "%Y%m%d_%H%M%S" for more precision
s3.upload_file(download_path.replace(".jp2", f"_modified_{timestamp}.jp2"), bucket_name, file_path.replace(".jp2", f"_modified_{timestamp}.jp2"))
timestamp = datetime.datetime.now().strftime(
"%Y%m%d"
) # use "%Y%m%d_%H%M%S" for more precision
s3.upload_file(
download_path.replace(
".jp2", f"_modified_{timestamp}.jp2"
),
bucket_name,
file_path.replace(".jp2", f"_modified_{timestamp}.jp2"),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="JP2 file processor")
parser.add_argument("--file", help="Path to a single JP2 file to process.")
parser.add_argument("--directory", help="Path to a directory of JP2 files to process.")
parser.add_argument("--bucket", help="Name of the AWS S3 bucket to process JP2 files from.")
parser.add_argument("--prefix", help="Prefix of files in the AWS S3 bucket (optional).")
parser.add_argument(
"--directory", help="Path to a directory of JP2 files to process."
)
parser.add_argument(
"--bucket", help="Name of the AWS S3 bucket to process JP2 files from."
)
parser.add_argument(
"--prefix", help="Prefix of files in the AWS S3 bucket (optional)."
)

args = parser.parse_args()

Expand All @@ -219,4 +269,4 @@ def process_s3_bucket(bucket_name, prefix=''):
elif args.bucket:
process_s3_bucket(args.bucket, args.prefix)
else:
print("Please specify either --file, --directory, or --bucket.")
print("Please specify either --file, --directory, or --bucket.")
Loading

0 comments on commit 682eac8

Please sign in to comment.