diff --git a/.gitignore b/.gitignore index 7dc3f5b..93b4cf0 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ output/* .coverage coverage.* +myenv/ + dist/* */*.egg-info/* __pycache__ diff --git a/README.md b/README.md index 189a5d4..6ba0f15 100644 --- a/README.md +++ b/README.md @@ -15,30 +15,56 @@ pip install jp2_remediator==0.0.2 ## Usage -## Process one file -`python3 box_reader.py --file tests/test-images/7514499.jp2` +```bash +python3 src/jp2_remediator/main.py -h -`python3 box_reader.py --file tests/test-images/481014278.jp2` +usage: main.py [-h] {file,directory,bucket} ... -## Process directory -`python3 box_reader.py --directory tests/test-images/` +JP2 file processor -## Process Amazon S3 bucket -`python3 box_reader.py --bucket your-bucket-name --prefix optional-prefix` +options: + -h, --help show this help message and exit -## Process all .jp2 files in the bucket: -`python3 box_reader.py --bucket remediation-folder` +Input source: + {file,directory,bucket} + file Process a single JP2 file + directory Process all JP2 files in a directory + bucket Process all JP2 files in an S3 bucket +``` -## Process only files with a specific prefix (folder): -`python3 box_reader.py --bucket remediation-folder --prefix testbatch_20240923` +### Process one file +```bash +python3 src/jp2_remediator/main.py file tests/test-images/7514499.jp2 -`python3 box_reader.py --help` +python3 src/jp2_remediator/main.py file tests/test-images/481014278.jp2 +``` -## Run Tests -`python3 test_aws_connection.py` +### Process directory +```bash +python3 src/jp2_remediator/main.py directory tests/test-images/ +``` -### Run from src folder -`python3 -m unittest jp2_remediator.tests.test_box_reader` +### Process all .jp2 files in an S3 bucket: +```bash +python3 src/jp2_remediator/main.py bucket remediation-folder +``` + +### Process only files with a specific prefix (folder): +```bash +python3 src/jp2_remediator/main.py bucket remediation-folder --prefix testbatch_20240923` +``` + +## Run tests + +### Run integration tests +```bash +pytest src/jp2_remediator/tests/integration/ +``` + +### Run unit tests +```bash +pytest src/jp2_remediator/tests/unit/ +``` ## Docker environment @@ -51,3 +77,13 @@ Start Docker container ```bash ./bin/docker-run.sh ``` + +## Development environment +```bash +python3 -m venv myenv +source myenv/bin/activate +export PYTHONPATH="${PYTHONPATH}:src" +pip install -r requirements.txt + +python src/jp2_remediator/main.py -h +``` \ No newline at end of file diff --git a/src/jp2_remediator/box_reader.py b/src/jp2_remediator/box_reader.py index 63f252e..b0e2a29 100644 --- a/src/jp2_remediator/box_reader.py +++ b/src/jp2_remediator/box_reader.py @@ -1,14 +1,6 @@ -# import sys -import os -import argparse -import boto3 import datetime - -# from jpylyzer import jpylyzer from jpylyzer import boxvalidator -# from jpylyzer import byteconv - class BoxReader: def __init__(self, file_path): @@ -272,68 +264,3 @@ def read_jp2_file(self): self.write_modified_file(new_file_contents) - -def process_directory(directory_path): - """Process all JP2 files in a given directory.""" - for root, _, files in os.walk(directory_path): - for file in files: - if file.lower().endswith(".jp2"): - file_path = os.path.join(root, file) - print(f"Processing file: {file_path}") - reader = BoxReader(file_path) - reader.read_jp2_file() - - -def process_s3_bucket(bucket_name, prefix=""): - """Process all JP2 files in a given S3 bucket.""" - s3 = boto3.client("s3") - response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix) - - if "Contents" in response: - for obj in response["Contents"]: - if obj["Key"].lower().endswith(".jp2"): - file_path = obj["Key"] - print(f"""Processing file: {file_path} from bucket { - bucket_name - }""") - download_path = f"/tmp/{os.path.basename(file_path)}" - s3.download_file(bucket_name, file_path, download_path) - reader = BoxReader(download_path) - reader.read_jp2_file() - # Optionally, upload modified file back to S3 - timestamp = datetime.datetime.now().strftime( - "%Y%m%d" - ) # use "%Y%m%d_%H%M%S" for more precision - s3.upload_file( - download_path.replace( - ".jp2", f"_modified_{timestamp}.jp2" - ), - bucket_name, - file_path.replace(".jp2", f"_modified_{timestamp}.jp2"), - ) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="JP2 file processor") - parser.add_argument("--file", help="Path to a single JP2 file to process.") - parser.add_argument( - "--directory", help="Path to a directory of JP2 files to process." - ) - parser.add_argument( - "--bucket", help="Name of the AWS S3 bucket to process JP2 files from." - ) - parser.add_argument( - "--prefix", help="Prefix of files in the AWS S3 bucket (optional)." - ) - - args = parser.parse_args() - - if args.file: - reader = BoxReader(args.file) - reader.read_jp2_file() - elif args.directory: - process_directory(args.directory) - elif args.bucket: - process_s3_bucket(args.bucket, args.prefix) - else: - print("Please specify either --file, --directory, or --bucket.") diff --git a/src/jp2_remediator/main.py b/src/jp2_remediator/main.py index 426e551..8c4b148 100644 --- a/src/jp2_remediator/main.py +++ b/src/jp2_remediator/main.py @@ -1,34 +1,62 @@ -import sys -import os +import argparse +from jp2_remediator.processor import Processor -def main(): - if len(sys.argv) != 3: - print("Usage: python script.py ") - sys.exit(1) - - folder_path1 = sys.argv[1] - folder_path2 = sys.argv[2] - - if not os.path.isdir(folder_path1): - print(f"Error: {folder_path1} is not a valid directory.") - sys.exit(1) - if not os.path.isdir(folder_path2): - print(f"Error: {folder_path2} is not a valid directory.") - sys.exit(1) - - print(f"Folder 1: {folder_path1}") - print(f"Folder 2: {folder_path2}") +def main(): + processor = Processor() + + parser = argparse.ArgumentParser(description="JP2 file processor") + + # Create mutually exclusive subparsers for specifying input source + subparsers = parser.add_subparsers( + title="Input source", dest="input_source" + ) + + # Subparser for processing a single JP2 file + file_parser = subparsers.add_parser( + "file", help="Process a single JP2 file" + ) + file_parser.add_argument( + "file", help="Path to a single JP2 file to process" + ) + file_parser.set_defaults( + func=lambda args: processor.process_file(args.file) + ) + + # Subparser for processing all JP2 files in a directory + directory_parser = subparsers.add_parser( + "directory", help="Process all JP2 files in a directory" + ) + directory_parser.add_argument( + "directory", help="Path to a directory of JP2 files to process" + ) + directory_parser.set_defaults( + func=lambda args: processor.process_directory(args.directory) + ) + + # Subparser for processing all JP2 files in an S3 bucket + bucket_parser = subparsers.add_parser( + "bucket", help="Process all JP2 files in an S3 bucket" + ) + bucket_parser.add_argument( + "bucket", help="Name of the AWS S3 bucket to process JP2 files from" + ) + bucket_parser.add_argument( + "--prefix", help="Prefix of files in the AWS S3 bucket (optional)", + default="" + ) + bucket_parser.set_defaults( + func=lambda args: processor.process_s3_bucket(args.bucket, args.prefix) + ) + + args = parser.parse_args() + + if hasattr(args, "func"): + args.func(args) + else: + parser.print_help() if __name__ == "__main__": - main() - - -def hello_world(): - print("Hello, world!") - - -def add_one(number): - return number + 1 + main() \ No newline at end of file diff --git a/src/jp2_remediator/processor.py b/src/jp2_remediator/processor.py new file mode 100644 index 0000000..0974901 --- /dev/null +++ b/src/jp2_remediator/processor.py @@ -0,0 +1,57 @@ +import datetime +import os +import boto3 + +from jp2_remediator.box_reader import BoxReader + + +class Processor: + """Class to process JP2 files.""" + + def process_file(self, file_path): + """Process a single JP2 file.""" + print(f"Processing file: {file_path}") + reader = BoxReader(file_path) + reader.read_jp2_file() + + + def process_directory(self, directory_path): + """Process all JP2 files in a given directory.""" + for root, _, files in os.walk(directory_path): + for file in files: + if file.lower().endswith(".jp2"): + file_path = os.path.join(root, file) + print(f"Processing file: {file_path}") + reader = BoxReader(file_path) + reader.read_jp2_file() + + + def process_s3_bucket(self, bucket_name, prefix=""): + """Process all JP2 files in a given S3 bucket.""" + s3 = boto3.client("s3") + response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix) + + if "Contents" in response: + for obj in response["Contents"]: + if obj["Key"].lower().endswith(".jp2"): + file_path = obj["Key"] + print(f"""Processing file: {file_path} from bucket { + bucket_name + }""") + download_path = f"/tmp/{os.path.basename(file_path)}" + s3.download_file(bucket_name, file_path, download_path) + reader = BoxReader(download_path) + reader.read_jp2_file() + # Optionally, upload modified file back to S3 + timestamp = datetime.datetime.now().strftime( + "%Y%m%d" + ) # use "%Y%m%d_%H%M%S" for more precision + s3.upload_file( + download_path.replace( + ".jp2", f"_modified_{timestamp}.jp2" + ), + bucket_name, + file_path.replace(".jp2", f"_modified_{timestamp}.jp2"), + ) + +