Skip to content

Commit

Permalink
SCC: make reader more resilient to malformed SCC content
Browse files Browse the repository at this point in the history
Paragraphs have PopOn style and empty text line by default
  • Loading branch information
valnoel committed Oct 12, 2023
1 parent d72d2f7 commit e749eb9
Show file tree
Hide file tree
Showing 9 changed files with 312 additions and 146 deletions.
53 changes: 25 additions & 28 deletions src/main/python/ttconv/scc/caption_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from __future__ import annotations

import logging
from typing import Optional, List, Union
from typing import List, Union

from ttconv.scc.caption_text import SccCaptionText

Expand All @@ -38,13 +38,18 @@
class SccCaptionLine:
"""Caption paragraph line"""

@staticmethod
def default():
"""Initializes a default caption paragraph line"""
return SccCaptionLine(0, 0)

def __init__(self, row: int, indent: int):
self._texts: List[SccCaptionText] = []
self._row: int = row # Row in the active area
self._indent: int = indent # Indentation in the active area

self._cursor: int = 0 # Position of the cursor on the line
self._current_text: Optional[SccCaptionText] = None # Text content where the cursor is
self._current_text: SccCaptionText = SccCaptionText() # Text content where the cursor is
self._texts: List[SccCaptionText] = [self._current_text]

def add_text(self, text: Union[SccCaptionText, str]):
"""Add text to line"""
Expand All @@ -55,31 +60,23 @@ def add_text(self, text: Union[SccCaptionText, str]):
self._cursor = self.get_length()

elif isinstance(text, str):
remaining_text = text

if self._current_text is None:
# Initialize a new text element if necessary
self._texts.append(SccCaptionText())
self._current_text = self._texts[-1]
self._append_text(text)

else:
remaining_text = text

# While the cursor is not on the last text element, and some text remains
while self._current_text is not self._texts[-1] and len(remaining_text) > 0:
available = self._current_text.get_length() - self._current_text.get_cursor()
text_to_write = remaining_text[:available]
# While the cursor is not on the last text element, and some text remains
while self._current_text is not self._texts[-1] and len(remaining_text) > 0:
available = self._current_text.get_length() - self._current_text.get_cursor()
text_to_write = remaining_text[:available]

# Replace current text element content
self._append_text(text_to_write)
remaining_text = remaining_text[available:]
# Replace current text element content
self._append_text(text_to_write)
remaining_text = remaining_text[available:]

# If some text remains on the last text element
if len(remaining_text) > 0:
assert self._current_text is self._texts[-1]
# If some text remains on the last text element
if len(remaining_text) > 0:
assert self._current_text is self._texts[-1]

# Replace and append to current text element content
self._append_text(remaining_text)
# Replace and append to current text element content
self._append_text(remaining_text)

else:
raise ValueError("Unsupported text type for SCC caption line")
Expand All @@ -96,7 +93,7 @@ def indent(self, indent: int):
"""Indent current line"""
self._indent += indent

def get_current_text(self) -> Optional[SccCaptionText]:
def get_current_text(self) -> SccCaptionText:
"""Returns current text content"""
return self._current_text

Expand Down Expand Up @@ -145,13 +142,13 @@ def get_indent(self) -> int:
def clear(self):
"""Clears the line text contents"""
self._texts.clear()
self._current_text = None
self._current_text = SccCaptionText()
self._texts = [self._current_text]
self.set_cursor(0)

def is_empty(self) -> bool:
"""Returns whether the line text is empty or not"""
# no caption texts or an empty text
return len(self._texts) == 0 or (len(self._texts) == 1 and self._texts[-1].get_text() == "")
return self.get_length() == 0

def get_leading_spaces(self) -> int:
"""Returns the number of leading space characters of the line"""
Expand Down
38 changes: 24 additions & 14 deletions src/main/python/ttconv/scc/caption_paragraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@
class SccCaptionParagraph:
"""Caption paragraph"""

@staticmethod
def default(caption_style: SccCaptionStyle = SccCaptionStyle.Unknown):
"""Initializes a default caption paragraph"""
return SccCaptionParagraph(caption_style=caption_style)

def __init__(self, safe_area_x_offset: int = 0, safe_area_y_offset: int = 0,
caption_style: SccCaptionStyle = SccCaptionStyle.Unknown):
self._caption_id: str = ""
Expand All @@ -70,6 +75,8 @@ def __init__(self, safe_area_x_offset: int = 0, safe_area_y_offset: int = 0,
self._current_line: Optional[SccCaptionLine] = None
# Lines per row in the active area (will be separated by line-breaks)
self._caption_lines: Dict[int, SccCaptionLine] = {}
# Initialize first default line
self.new_caption_line()

self._caption_style: SccCaptionStyle = caption_style
self._style_properties = {}
Expand Down Expand Up @@ -114,14 +121,12 @@ def get_caption_style(self) -> SccCaptionStyle:
"""Returns the caption style"""
return self._caption_style

def get_current_line(self) -> Optional[SccCaptionLine]:
def get_current_line(self) -> SccCaptionLine:
"""Returns the current caption line"""
return self._current_line

def get_current_text(self) -> Optional[SccCaptionText]:
def get_current_text(self) -> SccCaptionText:
"""Returns the current caption text"""
if self._current_line is None:
return None
return self._current_line.get_current_text()

def append_text(self, text: str):
Expand Down Expand Up @@ -155,9 +160,14 @@ def get_style_property(self, style_property) -> Optional:
def set_cursor_at(self, row: int, indent: Optional[int] = None):
"""Set cursor position and initialize a new line if necessary"""

# Remove current line if empty (useless)
if self._current_line is not None and self._current_line.is_empty():
del self._caption_lines[self._current_line.get_row()]
if self._caption_lines.get(self._current_line.get_row()) is not None:
# Set current line if necessary
if self._caption_lines.get(self._current_line.get_row()) is not self._current_line:
self._current_line = self._caption_lines.get(self._current_line.get_row())

# Remove current line if empty (i.e. useless)
if self._current_line.is_empty():
del self._caption_lines[self._current_line.get_row()]

self._cursor = (row, indent if indent is not None else 0)

Expand Down Expand Up @@ -198,7 +208,11 @@ def get_lines(self) -> Dict[int, SccCaptionLine]:

def is_empty(self) -> bool:
"""Returns whether the paragraph has no content"""
return not self._caption_lines
return self._get_length() == 0

def _get_length(self) -> int:
"""Returns the total length of contained text"""
return sum([line.get_length() for line in self._caption_lines.values()])

def copy_lines(self) -> Dict[int, SccCaptionLine]:
"""Copy paragraph lines (without time attributes)"""
Expand All @@ -217,10 +231,6 @@ def copy_lines(self) -> Dict[int, SccCaptionLine]:

def new_caption_text(self):
"""Appends a new caption text content, and keeps reference on it"""
if self._current_line is None:
LOGGER.warning("Add a new caption line to add new caption text")
self.new_caption_line()

self._current_line.add_text(SccCaptionText())

def new_caption_line(self):
Expand All @@ -245,7 +255,7 @@ def roll_up(self):

def get_origin(self) -> CoordinateType:
"""Computes and returns the current paragraph origin, based on its content"""
if len(self._caption_lines) > 0:
if not self.is_empty():
x_offsets = [text.get_indent() for text in self._caption_lines.values()]
y_offsets = [text.get_row() - 1 for text in self._caption_lines.values()]

Expand All @@ -255,7 +265,7 @@ def get_origin(self) -> CoordinateType:

def get_extent(self) -> ExtentType:
"""Computes and returns the current paragraph extent, based on its content"""
if len(self._caption_lines) == 0:
if self.is_empty():
return get_extent_from_dimensions(0, 0)

paragraph_rows = self._caption_lines.keys()
Expand Down
5 changes: 5 additions & 0 deletions src/main/python/ttconv/scc/caption_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,8 @@ class SccCaptionStyle(Enum):
# - EDM (to erase the displayed caption, optional)
# - EOC (to display the current caption)
PopOn = 3

@staticmethod
def default():
"""Returns the default caption style"""
return SccCaptionStyle.PopOn
26 changes: 13 additions & 13 deletions src/main/python/ttconv/scc/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,12 @@ def __init__(self, safe_area_x_offset: int, safe_area_y_offset: int, config: Opt
self.previous_word: Optional[SccWord] = None
self.previous_word_type: Optional[Type] = None

# Caption style (Pop-on, Roll-up, Paint-on) currently processed
self.current_style = SccCaptionStyle.default()
# Buffered caption being built
self.buffered_caption = None
# Captions being displayed
self.active_caption: Optional[SccCaptionParagraph] = None
# Caption style (Pop-on, Roll-up, Paint-on) currently processed
self.current_style = SccCaptionStyle.Unknown

# Roll-up caption number of lines
self.roll_up_depth: int = 0
Expand All @@ -97,7 +97,7 @@ def new_active_caption(self, begin_time_code: SmpteTimeCode, caption_style: SccC

def new_buffered_caption(self):
"""Resets buffered caption"""
self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset)
self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.PopOn)

def get_caption_to_process(self) -> Optional[SccCaptionParagraph]:
"""Returns the caption currently being processed"""
Expand Down Expand Up @@ -204,9 +204,6 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code:

self.active_caption.set_cursor_at(pac_row, pac_indent)

if self.active_caption.get_current_text() is None:
self.active_caption.new_caption_text()

elif self.current_style is SccCaptionStyle.RollUp:

if not self.has_active_caption():
Expand Down Expand Up @@ -299,11 +296,11 @@ def process_attribute_code(self, attribute_code: SccAttributeCode):

processed_caption = self.get_caption_to_process()

if processed_caption is None or processed_caption.get_current_text() is None:
if processed_caption is None:
LOGGER.warning("No current SCC caption nor content initialized")
return

if processed_caption.get_current_text() is not None and processed_caption.get_current_text().get_text():
if processed_caption.get_current_text().get_text():
processed_caption.new_caption_text()

if attribute_code.is_background():
Expand All @@ -321,11 +318,6 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim
# Start a new Pop-On caption
self.current_style = SccCaptionStyle.PopOn

if self.buffered_caption.get_caption_style() is SccCaptionStyle.Unknown:
self.buffered_caption.set_caption_style(self.current_style)
self.buffered_caption.new_caption_line()
self.buffered_caption.new_caption_text()

elif control_code is SccControlCode.RDC:
# Start a new Paint-On caption
self.current_style = SccCaptionStyle.PaintOn
Expand Down Expand Up @@ -432,6 +424,10 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim
def process_text(self, word: str, time_code: SmpteTimeCode):
"""Processes SCC text words"""
if self.current_style is SccCaptionStyle.PaintOn:
if not self.has_active_caption():
LOGGER.warning("Initialize active caption buffer to handle paint-on text at %s", time_code)
self.paint_on_active_caption(time_code)

if word.startswith(" "):

if self.active_caption.get_caption_style() is not SccCaptionStyle.PaintOn:
Expand Down Expand Up @@ -464,6 +460,10 @@ def process_text(self, word: str, time_code: SmpteTimeCode):
self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration)

elif self.current_style is SccCaptionStyle.RollUp:
if not self.has_active_caption():
LOGGER.warning("Initialize active caption buffer to handle roll-up text at %s", time_code)
self.new_active_caption(time_code, self.current_style)

self.active_caption.append_text(word)

self.active_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color)
Expand Down
68 changes: 68 additions & 0 deletions src/main/python/ttconv/scc/disassembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,18 @@
"""SCC disassembly functions"""
import logging

from ttconv.scc.codes.attribute_codes import SccAttributeCode
from ttconv.scc.codes.control_codes import SccControlCode
from ttconv.scc.codes.extended_characters import SccExtendedCharacter
from ttconv.scc.codes.mid_row_codes import SccMidRowCode
from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode
from ttconv.scc.codes.special_characters import SccSpecialCharacter
from ttconv.scc.word import SccWord
from ttconv.style_properties import ColorType, NamedColors, FontStyleType, TextDecorationType

LOGGER = logging.getLogger(__name__)


def get_color_disassembly(color: ColorType) -> str:
"""Get color disassembly code"""
disassembly = ""
Expand Down Expand Up @@ -81,3 +89,63 @@ def get_text_decoration_disassembly(text_decoration: TextDecorationType) -> str:
if text_decoration is not None and text_decoration.underline is True:
return "U"
return ""


def get_scc_word_disassembly(scc_word: SccWord) -> str:
"""Returns the disassembly code for specified SCC word"""
if scc_word.value == 0x0000:
return "{}"

if scc_word.byte_1 < 0x20:

attribute_code = SccAttributeCode.find(scc_word.value)
control_code = SccControlCode.find(scc_word.value)
mid_row_code = SccMidRowCode.find(scc_word.value)
pac = SccPreambleAddressCode.find(scc_word.byte_1, scc_word.byte_2)
spec_char = SccSpecialCharacter.find(scc_word.value)
extended_char = SccExtendedCharacter.find(scc_word.value)

if pac is not None:
disassembly_code = f"{{{pac.get_row():02}"
color = pac.get_color()
indent = pac.get_indent()
if indent is not None and indent > 0:
disassembly_code += f"{indent :02}"
elif color is not None:
disassembly_code += get_color_disassembly(color)
disassembly_code += get_font_style_disassembly(pac.get_font_style())
disassembly_code += get_text_decoration_disassembly(pac.get_text_decoration())
else:
disassembly_code += "00"
disassembly_code += "}"
return disassembly_code

if attribute_code is not None:
disassembly_code = "{"
disassembly_code += "B" if attribute_code.is_background() else ""
disassembly_code += get_color_disassembly(attribute_code.get_color())
disassembly_code += get_text_decoration_disassembly(attribute_code.get_text_decoration())
disassembly_code += "}"
return disassembly_code

if mid_row_code is not None:
disassembly_code = "{"
disassembly_code += get_color_disassembly(mid_row_code.get_color())
disassembly_code += get_font_style_disassembly(mid_row_code.get_font_style())
disassembly_code += get_text_decoration_disassembly(mid_row_code.get_text_decoration())
disassembly_code += "}"
return disassembly_code

if control_code is not None:
return "{" + control_code.get_name() + "}"

if spec_char is not None:
return spec_char.get_unicode_value()

if extended_char is not None:
return extended_char.get_unicode_value()

LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value))
return "{??}"

return scc_word.to_text()
Loading

0 comments on commit e749eb9

Please sign in to comment.