Skip to content

Commit

Permalink
Refactor 'main.py'
Browse files Browse the repository at this point in the history
  • Loading branch information
awoods committed Oct 27, 2024
1 parent 960dfe7 commit e5ce029
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 117 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ output/*
.coverage
coverage.*

myenv/

dist/*
*/*.egg-info/*
__pycache__
Expand Down
68 changes: 52 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,30 +15,56 @@ pip install jp2_remediator==0.0.2

## Usage

## Process one file
`python3 box_reader.py --file tests/test-images/7514499.jp2`
```bash
python3 src/jp2_remediator/main.py -h

`python3 box_reader.py --file tests/test-images/481014278.jp2`
usage: main.py [-h] {file,directory,bucket} ...

## Process directory
`python3 box_reader.py --directory tests/test-images/`
JP2 file processor

## Process Amazon S3 bucket
`python3 box_reader.py --bucket your-bucket-name --prefix optional-prefix`
options:
-h, --help show this help message and exit

## Process all .jp2 files in the bucket:
`python3 box_reader.py --bucket remediation-folder`
Input source:
{file,directory,bucket}
file Process a single JP2 file
directory Process all JP2 files in a directory
bucket Process all JP2 files in an S3 bucket
```

## Process only files with a specific prefix (folder):
`python3 box_reader.py --bucket remediation-folder --prefix testbatch_20240923`
### Process one file
```bash
python3 src/jp2_remediator/main.py file tests/test-images/7514499.jp2

`python3 box_reader.py --help`
python3 src/jp2_remediator/main.py file tests/test-images/481014278.jp2
```

## Run Tests
`python3 test_aws_connection.py`
### Process directory
```bash
python3 src/jp2_remediator/main.py directory tests/test-images/
```

### Run from src folder
`python3 -m unittest jp2_remediator.tests.test_box_reader`
### Process all .jp2 files in an S3 bucket:
```bash
python3 src/jp2_remediator/main.py bucket remediation-folder
```

### Process only files with a specific prefix (folder):
```bash
python3 src/jp2_remediator/main.py bucket remediation-folder --prefix testbatch_20240923`
```

## Run tests

### Run integration tests
```bash
pytest src/jp2_remediator/tests/integration/
```

### Run unit tests
```bash
pytest src/jp2_remediator/tests/unit/
```

## Docker environment

Expand All @@ -51,3 +77,13 @@ Start Docker container
```bash
./bin/docker-run.sh
```

## Development environment
```bash
python3 -m venv myenv
source myenv/bin/activate
export PYTHONPATH="${PYTHONPATH}:src"
pip install -r requirements.txt
python src/jp2_remediator/main.py -h
```
73 changes: 0 additions & 73 deletions src/jp2_remediator/box_reader.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
# import sys
import os
import argparse
import boto3
import datetime

# from jpylyzer import jpylyzer
from jpylyzer import boxvalidator

# from jpylyzer import byteconv


class BoxReader:
def __init__(self, file_path):
Expand Down Expand Up @@ -272,68 +264,3 @@ def read_jp2_file(self):

self.write_modified_file(new_file_contents)


def process_directory(directory_path):
"""Process all JP2 files in a given directory."""
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(".jp2"):
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
reader = BoxReader(file_path)
reader.read_jp2_file()


def process_s3_bucket(bucket_name, prefix=""):
"""Process all JP2 files in a given S3 bucket."""
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

if "Contents" in response:
for obj in response["Contents"]:
if obj["Key"].lower().endswith(".jp2"):
file_path = obj["Key"]
print(f"""Processing file: {file_path} from bucket {
bucket_name
}""")
download_path = f"/tmp/{os.path.basename(file_path)}"
s3.download_file(bucket_name, file_path, download_path)
reader = BoxReader(download_path)
reader.read_jp2_file()
# Optionally, upload modified file back to S3
timestamp = datetime.datetime.now().strftime(
"%Y%m%d"
) # use "%Y%m%d_%H%M%S" for more precision
s3.upload_file(
download_path.replace(
".jp2", f"_modified_{timestamp}.jp2"
),
bucket_name,
file_path.replace(".jp2", f"_modified_{timestamp}.jp2"),
)


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="JP2 file processor")
parser.add_argument("--file", help="Path to a single JP2 file to process.")
parser.add_argument(
"--directory", help="Path to a directory of JP2 files to process."
)
parser.add_argument(
"--bucket", help="Name of the AWS S3 bucket to process JP2 files from."
)
parser.add_argument(
"--prefix", help="Prefix of files in the AWS S3 bucket (optional)."
)

args = parser.parse_args()

if args.file:
reader = BoxReader(args.file)
reader.read_jp2_file()
elif args.directory:
process_directory(args.directory)
elif args.bucket:
process_s3_bucket(args.bucket, args.prefix)
else:
print("Please specify either --file, --directory, or --bucket.")
84 changes: 56 additions & 28 deletions src/jp2_remediator/main.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,62 @@
import sys
import os
import argparse
from jp2_remediator.processor import Processor


def main():
if len(sys.argv) != 3:
print("Usage: python script.py <folder_path1> <folder_path2>")
sys.exit(1)

folder_path1 = sys.argv[1]
folder_path2 = sys.argv[2]

if not os.path.isdir(folder_path1):
print(f"Error: {folder_path1} is not a valid directory.")
sys.exit(1)

if not os.path.isdir(folder_path2):
print(f"Error: {folder_path2} is not a valid directory.")
sys.exit(1)

print(f"Folder 1: {folder_path1}")
print(f"Folder 2: {folder_path2}")
def main():
processor = Processor()

parser = argparse.ArgumentParser(description="JP2 file processor")

# Create mutually exclusive subparsers for specifying input source
subparsers = parser.add_subparsers(
title="Input source", dest="input_source"
)

# Subparser for processing a single JP2 file
file_parser = subparsers.add_parser(
"file", help="Process a single JP2 file"
)
file_parser.add_argument(
"file", help="Path to a single JP2 file to process"
)
file_parser.set_defaults(
func=lambda args: processor.process_file(args.file)
)

# Subparser for processing all JP2 files in a directory
directory_parser = subparsers.add_parser(
"directory", help="Process all JP2 files in a directory"
)
directory_parser.add_argument(
"directory", help="Path to a directory of JP2 files to process"
)
directory_parser.set_defaults(
func=lambda args: processor.process_directory(args.directory)
)

# Subparser for processing all JP2 files in an S3 bucket
bucket_parser = subparsers.add_parser(
"bucket", help="Process all JP2 files in an S3 bucket"
)
bucket_parser.add_argument(
"bucket", help="Name of the AWS S3 bucket to process JP2 files from"
)
bucket_parser.add_argument(
"--prefix", help="Prefix of files in the AWS S3 bucket (optional)",
default=""
)
bucket_parser.set_defaults(
func=lambda args: processor.process_s3_bucket(args.bucket, args.prefix)
)

args = parser.parse_args()

if hasattr(args, "func"):
args.func(args)
else:
parser.print_help()


if __name__ == "__main__":
main()


def hello_world():
print("Hello, world!")


def add_one(number):
return number + 1
main()
57 changes: 57 additions & 0 deletions src/jp2_remediator/processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import datetime
import os
import boto3

from jp2_remediator.box_reader import BoxReader


class Processor:
"""Class to process JP2 files."""

def process_file(self, file_path):
"""Process a single JP2 file."""
print(f"Processing file: {file_path}")
reader = BoxReader(file_path)
reader.read_jp2_file()


def process_directory(self, directory_path):
"""Process all JP2 files in a given directory."""
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(".jp2"):
file_path = os.path.join(root, file)
print(f"Processing file: {file_path}")
reader = BoxReader(file_path)
reader.read_jp2_file()


def process_s3_bucket(self, bucket_name, prefix=""):
"""Process all JP2 files in a given S3 bucket."""
s3 = boto3.client("s3")
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)

if "Contents" in response:
for obj in response["Contents"]:
if obj["Key"].lower().endswith(".jp2"):
file_path = obj["Key"]
print(f"""Processing file: {file_path} from bucket {
bucket_name
}""")
download_path = f"/tmp/{os.path.basename(file_path)}"
s3.download_file(bucket_name, file_path, download_path)
reader = BoxReader(download_path)
reader.read_jp2_file()
# Optionally, upload modified file back to S3
timestamp = datetime.datetime.now().strftime(
"%Y%m%d"
) # use "%Y%m%d_%H%M%S" for more precision
s3.upload_file(
download_path.replace(
".jp2", f"_modified_{timestamp}.jp2"
),
bucket_name,
file_path.replace(".jp2", f"_modified_{timestamp}.jp2"),
)


0 comments on commit e5ce029

Please sign in to comment.