From 49d14db9641b54914bd6811e39d4195e1135393a Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Mon, 4 Sep 2023 11:05:22 +0200 Subject: [PATCH] SCC: allow starting parsing from the middle of a caption Skip until a caption style is specified --- .../python/ttconv/scc/caption_paragraph.py | 7 + src/main/python/ttconv/scc/context.py | 34 ++-- src/test/python/test_scc_reader.py | 175 ++++++++++++++++++ 3 files changed, 204 insertions(+), 12 deletions(-) diff --git a/src/main/python/ttconv/scc/caption_paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py index 9452296a..6b30eabd 100644 --- a/src/main/python/ttconv/scc/caption_paragraph.py +++ b/src/main/python/ttconv/scc/caption_paragraph.py @@ -187,6 +187,10 @@ def get_lines(self) -> Dict[int, SccCaptionLine]: """Returns the paragraph lines per row""" return self._caption_lines + def is_empty(self) -> bool: + """Returns whether the paragraph has no content""" + return not self._caption_lines + def copy_lines(self) -> Dict[int, SccCaptionLine]: """Copy paragraph lines (without time attributes)""" lines_copy = {} @@ -265,6 +269,9 @@ def guess_text_alignment(self) -> TextAlignType: def get_line_right_offset(line: SccCaptionLine) -> int: return SCC_ROOT_CELL_RESOLUTION_COLUMNS - (line.get_indent() + line.get_length()) + if self.is_empty(): + return TextAlignType.start + # look for longest line longest_line = max(self._caption_lines.values(), key=lambda line: line.get_length()) diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py index ae1a756c..d2db5237 100644 --- a/src/main/python/ttconv/scc/context.py +++ b/src/main/python/ttconv/scc/context.py @@ -28,6 +28,7 @@ from __future__ import annotations import copy +import logging from typing import Optional, Type, Tuple from ttconv.model import Div @@ -44,6 +45,8 @@ ROLL_UP_BASE_ROW = 15 +LOGGER = logging.getLogger(__name__) + class SccContext: """SCC context for reader""" @@ -96,7 +99,7 @@ def new_buffered_caption(self): """Resets buffered caption""" self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) - def get_caption_to_process(self): + def get_caption_to_process(self) -> Optional[SccCaptionParagraph]: """Returns the caption currently being processed""" if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): # If the Paint-On or Roll-Up style is activated, write directly on active caption @@ -104,7 +107,9 @@ def get_caption_to_process(self): if self.current_style is SccCaptionStyle.PopOn: # For Pop-On style, write first on a buffered caption return self.buffered_caption - raise ValueError("SCC caption style not defined") + + LOGGER.warning("SCC caption style not defined") + return None def has_active_caption(self) -> bool: """Returns whether captions are being displayed or not""" @@ -147,7 +152,8 @@ def push_active_caption_to_model(self, time_code: SmpteTimeCode, clear_active_ca if clear_active_caption: self.active_caption = None - self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) + if not previous_caption.is_empty(): + self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) def backspace(self): """Move the cursors in a column to the left""" @@ -226,7 +232,7 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: self.buffered_caption.set_cursor_at(pac_row, pac_indent) else: - raise ValueError("SCC caption style not defined") + LOGGER.warning("SCC caption style not defined") self.current_color = pac.get_color() self.current_font_style = pac.get_font_style() @@ -248,8 +254,9 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime # In case of multiple mid-row codes, move right only after the first code # If there is already text on the current line - if processed_caption.get_current_text() is not None \ - and processed_caption.get_current_text().get_text() != "": + if processed_caption is not None \ + and processed_caption.get_current_text() is not None \ + and not processed_caption.get_current_text().is_empty(): # In case of paint-on replacing text if self.current_style is SccCaptionStyle.PaintOn \ @@ -264,7 +271,7 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime processed_caption.append_text(" ") processed_caption.new_caption_text() - else: + elif processed_caption is not None: processed_caption.append_text(" ") self.current_color = color @@ -279,10 +286,12 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime if text_decoration is not None: self.current_text_decoration = text_decoration - processed_caption.append_text(" ") - processed_caption.new_caption_text() + if processed_caption is not None: + processed_caption.append_text(" ") + processed_caption.new_caption_text() - if processed_caption.get_caption_style() is SccCaptionStyle.PaintOn: + if processed_caption is not None \ + and processed_caption.get_caption_style() is SccCaptionStyle.PaintOn: processed_caption.get_current_text().set_begin(time_code) def process_attribute_code(self, attribute_code: SccAttributeCode): @@ -291,7 +300,8 @@ def process_attribute_code(self, attribute_code: SccAttributeCode): processed_caption = self.get_caption_to_process() if processed_caption is None or processed_caption.get_current_text() is None: - raise ValueError("No current SCC caption nor content initialized") + LOGGER.warning("No current SCC caption nor content initialized") + return if processed_caption.get_current_text() is not None and processed_caption.get_current_text().get_text(): processed_caption.new_caption_text() @@ -460,7 +470,7 @@ def process_text(self, word: str, time_code: SmpteTimeCode): self.active_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) - else: + elif self.current_style is SccCaptionStyle.PopOn: self.buffered_caption.append_text(word) self.buffered_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index f45c7a7e..74fa312a 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -1016,6 +1016,181 @@ def test_scc_mid_row_erase_displayed_memory_control_code(self): self.check_caption(p_list[5], "caption6", "00:00:03:19", None, "sagittis.") self.assertEqual(region_4, p_list[5].get_region()) + def test_scc_content_parsing_from_text(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + + def test_scc_content_parsing_from_mid_row_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 91ae 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 {I}ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + + def test_scc_content_parsing_from_control_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 942c 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 {EDM}ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + + def test_scc_content_parsing_from_preamble_address_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 9370 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 {1300}ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) if __name__ == '__main__': unittest.main()