From b7ec1a407cf0590432aaf5f67613a0e581cc9bfe Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 7 Jan 2025 11:13:19 +0000 Subject: [PATCH 1/3] Make entry points uniform --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6747baf..58462bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dev = [ [project.scripts] aa-remove-data = "aa_remove_data.__main__:main" aa-print-header = "aa_remove_data.pb_utils:print_header" -PB_2_TXT = "aa_remove_data.pb_utils:pb_2_txt" +pb_2_txt = "aa_remove_data.pb_utils:pb_2_txt" [project.urls] GitHub = "https://github.com/DiamondLightSource/aa-remove-data" From d86c9f508b829e4636ada6e7707b13ec0fd71ae9 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 7 Jan 2025 12:06:50 +0000 Subject: [PATCH 2/3] Add 'write complete' message --- pyproject.toml | 2 +- src/aa_remove_data/pb_utils.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 58462bd..4b622c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dev = [ [project.scripts] aa-remove-data = "aa_remove_data.__main__:main" aa-print-header = "aa_remove_data.pb_utils:print_header" -pb_2_txt = "aa_remove_data.pb_utils:pb_2_txt" +pb-2-txt = "aa_remove_data.pb_utils:pb_2_txt" [project.urls] GitHub = "https://github.com/DiamondLightSource/aa-remove-data" diff --git a/src/aa_remove_data/pb_utils.py b/src/aa_remove_data/pb_utils.py index 0d19be6..3289111 100644 --- a/src/aa_remove_data/pb_utils.py +++ b/src/aa_remove_data/pb_utils.py @@ -178,6 +178,7 @@ def pb_2_txt(): raise ValueError(f"Invalid file extension: '{pb_file.suffix}'. Expected '.pb'.") pb = PBUtils(pb_file) pb.write_to_txt(txt_file) + print("Write completed!") def print_header(): From af80b136bf3a9925d146db6531559c4025aa3cb5 Mon Sep 17 00:00:00 2001 From: Jacob Williamson Date: Tue, 7 Jan 2025 16:21:59 +0000 Subject: [PATCH 3/3] Write chunked data to multiple files --- src/aa_remove_data/pb_utils.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/aa_remove_data/pb_utils.py b/src/aa_remove_data/pb_utils.py index 56a0816..e22b057 100644 --- a/src/aa_remove_data/pb_utils.py +++ b/src/aa_remove_data/pb_utils.py @@ -27,7 +27,7 @@ def __init__(self, filepath: PathLike | None = None, chunk_size=10000000): self.read_done = False self._chunk_size = chunk_size self._start_line = 0 - self._write_started = False + self._write_started = [] if filepath: self.read_pb(filepath) @@ -215,13 +215,15 @@ def write_pb(self, filepath: PathLike): self._replace_newline_chars(sample.SerializeToString()) + b"\n" for sample in self.samples ] - if self._write_started is False: # Write header, start new file + if ( + filepath not in self._write_started or self.chunked is False + ): # Write header, start new file header_b = ( self._replace_newline_chars(self.header.SerializeToString()) + b"\n" ) with open(filepath, "wb") as f: f.writelines([header_b] + samples_b) - self._write_started = True + self._write_started.append(filepath) else: # Add to existing file with open(filepath, "ab") as f: f.writelines(samples_b)