SCC: make reader more resilient to malformed SCC content

Paragraphs have PopOn style and empty text line by default
sandflow · Oct 12, 2023 · e749eb9 · e749eb9
1 parent d72d2f7
commit e749eb9
Show file tree

Hide file tree

Showing 9 changed files with 312 additions and 146 deletions.
diff --git a/src/main/python/ttconv/scc/caption_line.py b/src/main/python/ttconv/scc/caption_line.py
@@ -28,7 +28,7 @@
 from __future__ import annotations
 
 import logging
-from typing import Optional, List, Union
+from typing import List, Union
 
 from ttconv.scc.caption_text import SccCaptionText
 
@@ -38,13 +38,18 @@
 class SccCaptionLine:
   """Caption paragraph line"""
 
+  @staticmethod
+  def default():
+    """Initializes a default caption paragraph line"""
+    return SccCaptionLine(0, 0)
+
   def __init__(self, row: int, indent: int):
-    self._texts: List[SccCaptionText] = []
     self._row: int = row  # Row in the active area
     self._indent: int = indent  # Indentation in the active area
 
     self._cursor: int = 0  # Position of the cursor on the line
-    self._current_text: Optional[SccCaptionText] = None  # Text content where the cursor is
+    self._current_text: SccCaptionText = SccCaptionText()  # Text content where the cursor is
+    self._texts: List[SccCaptionText] = [self._current_text]
 
   def add_text(self, text: Union[SccCaptionText, str]):
     """Add text to line"""
@@ -55,31 +60,23 @@ def add_text(self, text: Union[SccCaptionText, str]):
       self._cursor = self.get_length()
 
     elif isinstance(text, str):
+      remaining_text = text
 
-      if self._current_text is None:
-        # Initialize a new text element if necessary
-        self._texts.append(SccCaptionText())
-        self._current_text = self._texts[-1]
-        self._append_text(text)
-
-      else:
-        remaining_text = text
-
-        # While the cursor is not on the last text element, and some text remains
-        while self._current_text is not self._texts[-1] and len(remaining_text) > 0:
-          available = self._current_text.get_length() - self._current_text.get_cursor()
-          text_to_write = remaining_text[:available]
+      # While the cursor is not on the last text element, and some text remains
+      while self._current_text is not self._texts[-1] and len(remaining_text) > 0:
+        available = self._current_text.get_length() - self._current_text.get_cursor()
+        text_to_write = remaining_text[:available]
 
-          # Replace current text element content
-          self._append_text(text_to_write)
-          remaining_text = remaining_text[available:]
+        # Replace current text element content
+        self._append_text(text_to_write)
+        remaining_text = remaining_text[available:]
 
-        # If some text remains on the last text element
-        if len(remaining_text) > 0:
-          assert self._current_text is self._texts[-1]
+      # If some text remains on the last text element
+      if len(remaining_text) > 0:
+        assert self._current_text is self._texts[-1]
 
-          # Replace and append to current text element content
-          self._append_text(remaining_text)
+        # Replace and append to current text element content
+        self._append_text(remaining_text)
 
     else:
       raise ValueError("Unsupported text type for SCC caption line")
@@ -96,7 +93,7 @@ def indent(self, indent: int):
     """Indent current line"""
     self._indent += indent
 
-  def get_current_text(self) -> Optional[SccCaptionText]:
+  def get_current_text(self) -> SccCaptionText:
     """Returns current text content"""
     return self._current_text
 
@@ -145,13 +142,13 @@ def get_indent(self) -> int:
   def clear(self):
     """Clears the line text contents"""
     self._texts.clear()
-    self._current_text = None
+    self._current_text = SccCaptionText()
+    self._texts = [self._current_text]
     self.set_cursor(0)
 
   def is_empty(self) -> bool:
     """Returns whether the line text is empty or not"""
-    # no caption texts or an empty text
-    return len(self._texts) == 0 or (len(self._texts) == 1 and self._texts[-1].get_text() == "")
+    return self.get_length() == 0
 
   def get_leading_spaces(self) -> int:
     """Returns the number of leading space characters of the line"""

diff --git a/src/main/python/ttconv/scc/caption_paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py
@@ -52,6 +52,11 @@
 class SccCaptionParagraph:
   """Caption paragraph"""
 
+  @staticmethod
+  def default(caption_style: SccCaptionStyle = SccCaptionStyle.Unknown):
+    """Initializes a default caption paragraph"""
+    return SccCaptionParagraph(caption_style=caption_style)
+
   def __init__(self, safe_area_x_offset: int = 0, safe_area_y_offset: int = 0,
                caption_style: SccCaptionStyle = SccCaptionStyle.Unknown):
     self._caption_id: str = ""
@@ -70,6 +75,8 @@ def __init__(self, safe_area_x_offset: int = 0, safe_area_y_offset: int = 0,
     self._current_line: Optional[SccCaptionLine] = None
     # Lines per row in the active area (will be separated by line-breaks)
     self._caption_lines: Dict[int, SccCaptionLine] = {}
+    # Initialize first default line
+    self.new_caption_line()
 
     self._caption_style: SccCaptionStyle = caption_style
     self._style_properties = {}
@@ -114,14 +121,12 @@ def get_caption_style(self) -> SccCaptionStyle:
     """Returns the caption style"""
     return self._caption_style
 
-  def get_current_line(self) -> Optional[SccCaptionLine]:
+  def get_current_line(self) -> SccCaptionLine:
     """Returns the current caption line"""
     return self._current_line
 
-  def get_current_text(self) -> Optional[SccCaptionText]:
+  def get_current_text(self) -> SccCaptionText:
     """Returns the current caption text"""
-    if self._current_line is None:
-      return None
     return self._current_line.get_current_text()
 
   def append_text(self, text: str):
@@ -155,9 +160,14 @@ def get_style_property(self, style_property) -> Optional:
   def set_cursor_at(self, row: int, indent: Optional[int] = None):
     """Set cursor position and initialize a new line if necessary"""
 
-    # Remove current line if empty (useless)
-    if self._current_line is not None and self._current_line.is_empty():
-      del self._caption_lines[self._current_line.get_row()]
+    if self._caption_lines.get(self._current_line.get_row()) is not None:
+      # Set current line if necessary
+      if self._caption_lines.get(self._current_line.get_row()) is not self._current_line:
+        self._current_line = self._caption_lines.get(self._current_line.get_row())
+
+      # Remove current line if empty (i.e. useless)
+      if self._current_line.is_empty():
+        del self._caption_lines[self._current_line.get_row()]
 
     self._cursor = (row, indent if indent is not None else 0)
 
@@ -198,7 +208,11 @@ def get_lines(self) -> Dict[int, SccCaptionLine]:
 
   def is_empty(self) -> bool:
     """Returns whether the paragraph has no content"""
-    return not self._caption_lines
+    return self._get_length() == 0
+
+  def _get_length(self) -> int:
+    """Returns the total length of contained text"""
+    return sum([line.get_length() for line in self._caption_lines.values()])
 
   def copy_lines(self) -> Dict[int, SccCaptionLine]:
     """Copy paragraph lines (without time attributes)"""
@@ -217,10 +231,6 @@ def copy_lines(self) -> Dict[int, SccCaptionLine]:
 
   def new_caption_text(self):
     """Appends a new caption text content, and keeps reference on it"""
-    if self._current_line is None:
-      LOGGER.warning("Add a new caption line to add new caption text")
-      self.new_caption_line()
-
     self._current_line.add_text(SccCaptionText())
 
   def new_caption_line(self):
@@ -245,7 +255,7 @@ def roll_up(self):
 
   def get_origin(self) -> CoordinateType:
     """Computes and returns the current paragraph origin, based on its content"""
-    if len(self._caption_lines) > 0:
+    if not self.is_empty():
       x_offsets = [text.get_indent() for text in self._caption_lines.values()]
       y_offsets = [text.get_row() - 1 for text in self._caption_lines.values()]
 
@@ -255,7 +265,7 @@ def get_origin(self) -> CoordinateType:
 
   def get_extent(self) -> ExtentType:
     """Computes and returns the current paragraph extent, based on its content"""
-    if len(self._caption_lines) == 0:
+    if self.is_empty():
       return get_extent_from_dimensions(0, 0)
 
     paragraph_rows = self._caption_lines.keys()

diff --git a/src/main/python/ttconv/scc/caption_style.py b/src/main/python/ttconv/scc/caption_style.py
@@ -58,3 +58,8 @@ class SccCaptionStyle(Enum):
   #  - EDM (to erase the displayed caption, optional)
   #  - EOC (to display the current caption)
   PopOn = 3
+
+  @staticmethod
+  def default():
+    """Returns the default caption style"""
+    return SccCaptionStyle.PopOn
diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py
@@ -66,12 +66,12 @@ def __init__(self, safe_area_x_offset: int, safe_area_y_offset: int, config: Opt
     self.previous_word: Optional[SccWord] = None
     self.previous_word_type: Optional[Type] = None
 
+    # Caption style (Pop-on, Roll-up, Paint-on) currently processed
+    self.current_style = SccCaptionStyle.default()
     # Buffered caption being built
     self.buffered_caption = None
     # Captions being displayed
     self.active_caption: Optional[SccCaptionParagraph] = None
-    # Caption style (Pop-on, Roll-up, Paint-on) currently processed
-    self.current_style = SccCaptionStyle.Unknown
 
     # Roll-up caption number of lines
     self.roll_up_depth: int = 0
@@ -97,7 +97,7 @@ def new_active_caption(self, begin_time_code: SmpteTimeCode, caption_style: SccC
 
   def new_buffered_caption(self):
     """Resets buffered caption"""
-    self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset)
+    self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.PopOn)
 
   def get_caption_to_process(self) -> Optional[SccCaptionParagraph]:
     """Returns the caption currently being processed"""
@@ -204,9 +204,6 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code:
 
       self.active_caption.set_cursor_at(pac_row, pac_indent)
 
-      if self.active_caption.get_current_text() is None:
-        self.active_caption.new_caption_text()
-
     elif self.current_style is SccCaptionStyle.RollUp:
 
       if not self.has_active_caption():
@@ -299,11 +296,11 @@ def process_attribute_code(self, attribute_code: SccAttributeCode):
 
     processed_caption = self.get_caption_to_process()
 
-    if processed_caption is None or processed_caption.get_current_text() is None:
+    if processed_caption is None:
       LOGGER.warning("No current SCC caption nor content initialized")
       return
 
-    if processed_caption.get_current_text() is not None and processed_caption.get_current_text().get_text():
+    if processed_caption.get_current_text().get_text():
       processed_caption.new_caption_text()
 
     if attribute_code.is_background():
@@ -321,11 +318,6 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim
       # Start a new Pop-On caption
       self.current_style = SccCaptionStyle.PopOn
 
-      if self.buffered_caption.get_caption_style() is SccCaptionStyle.Unknown:
-        self.buffered_caption.set_caption_style(self.current_style)
-        self.buffered_caption.new_caption_line()
-        self.buffered_caption.new_caption_text()
-
     elif control_code is SccControlCode.RDC:
       # Start a new Paint-On caption
       self.current_style = SccCaptionStyle.PaintOn
@@ -432,6 +424,10 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim
   def process_text(self, word: str, time_code: SmpteTimeCode):
     """Processes SCC text words"""
     if self.current_style is SccCaptionStyle.PaintOn:
+      if not self.has_active_caption():
+        LOGGER.warning("Initialize active caption buffer to handle paint-on text at %s", time_code)
+        self.paint_on_active_caption(time_code)
+
       if word.startswith(" "):
 
         if self.active_caption.get_caption_style() is not SccCaptionStyle.PaintOn:
@@ -464,6 +460,10 @@ def process_text(self, word: str, time_code: SmpteTimeCode):
       self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration)
 
     elif self.current_style is SccCaptionStyle.RollUp:
+      if not self.has_active_caption():
+        LOGGER.warning("Initialize active caption buffer to handle roll-up text at %s", time_code)
+        self.new_active_caption(time_code, self.current_style)
+
       self.active_caption.append_text(word)
 
       self.active_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color)

diff --git a/src/main/python/ttconv/scc/disassembly.py b/src/main/python/ttconv/scc/disassembly.py
@@ -26,10 +26,18 @@
 """SCC disassembly functions"""
 import logging
 
+from ttconv.scc.codes.attribute_codes import SccAttributeCode
+from ttconv.scc.codes.control_codes import SccControlCode
+from ttconv.scc.codes.extended_characters import SccExtendedCharacter
+from ttconv.scc.codes.mid_row_codes import SccMidRowCode
+from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode
+from ttconv.scc.codes.special_characters import SccSpecialCharacter
+from ttconv.scc.word import SccWord
 from ttconv.style_properties import ColorType, NamedColors, FontStyleType, TextDecorationType
 
 LOGGER = logging.getLogger(__name__)
 
+
 def get_color_disassembly(color: ColorType) -> str:
   """Get color disassembly code"""
   disassembly = ""
@@ -81,3 +89,63 @@ def get_text_decoration_disassembly(text_decoration: TextDecorationType) -> str:
   if text_decoration is not None and text_decoration.underline is True:
     return "U"
   return ""
+
+
+def get_scc_word_disassembly(scc_word: SccWord) -> str:
+  """Returns the disassembly code for specified SCC word"""
+  if scc_word.value == 0x0000:
+    return "{}"
+
+  if scc_word.byte_1 < 0x20:
+
+    attribute_code = SccAttributeCode.find(scc_word.value)
+    control_code = SccControlCode.find(scc_word.value)
+    mid_row_code = SccMidRowCode.find(scc_word.value)
+    pac = SccPreambleAddressCode.find(scc_word.byte_1, scc_word.byte_2)
+    spec_char = SccSpecialCharacter.find(scc_word.value)
+    extended_char = SccExtendedCharacter.find(scc_word.value)
+
+    if pac is not None:
+      disassembly_code = f"{{{pac.get_row():02}"
+      color = pac.get_color()
+      indent = pac.get_indent()
+      if indent is not None and indent > 0:
+        disassembly_code += f"{indent :02}"
+      elif color is not None:
+        disassembly_code += get_color_disassembly(color)
+        disassembly_code += get_font_style_disassembly(pac.get_font_style())
+        disassembly_code += get_text_decoration_disassembly(pac.get_text_decoration())
+      else:
+        disassembly_code += "00"
+      disassembly_code += "}"
+      return disassembly_code
+
+    if attribute_code is not None:
+      disassembly_code = "{"
+      disassembly_code += "B" if attribute_code.is_background() else ""
+      disassembly_code += get_color_disassembly(attribute_code.get_color())
+      disassembly_code += get_text_decoration_disassembly(attribute_code.get_text_decoration())
+      disassembly_code += "}"
+      return disassembly_code
+
+    if mid_row_code is not None:
+      disassembly_code = "{"
+      disassembly_code += get_color_disassembly(mid_row_code.get_color())
+      disassembly_code += get_font_style_disassembly(mid_row_code.get_font_style())
+      disassembly_code += get_text_decoration_disassembly(mid_row_code.get_text_decoration())
+      disassembly_code += "}"
+      return disassembly_code
+
+    if control_code is not None:
+      return "{" + control_code.get_name() + "}"
+
+    if spec_char is not None:
+      return spec_char.get_unicode_value()
+
+    if extended_char is not None:
+      return extended_char.get_unicode_value()
+
+    LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value))
+    return "{??}"
+
+  return scc_word.to_text()