From 9cde0ec57fe81ff798cdded93b76f2436b3b27cf Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Wed, 9 Aug 2023 17:44:04 +0200 Subject: [PATCH 01/13] SCC: extract content.py file classes into files --- .../scc/{content.py => caption_line.py} | 85 +------------ src/main/python/ttconv/scc/caption_text.py | 114 ++++++++++++++++++ src/main/python/ttconv/scc/paragraph.py | 3 +- src/main/python/ttconv/scc/reader.py | 3 +- src/test/python/test_scc_content.py | 3 +- src/test/python/test_scc_line.py | 2 +- src/test/python/test_scc_paragraph.py | 2 +- 7 files changed, 124 insertions(+), 88 deletions(-) rename src/main/python/ttconv/scc/{content.py => caption_line.py} (71%) create mode 100644 src/main/python/ttconv/scc/caption_text.py diff --git a/src/main/python/ttconv/scc/content.py b/src/main/python/ttconv/scc/caption_line.py similarity index 71% rename from src/main/python/ttconv/scc/content.py rename to src/main/python/ttconv/scc/caption_line.py index 60a58695..b26cf27c 100644 --- a/src/main/python/ttconv/scc/content.py +++ b/src/main/python/ttconv/scc/caption_line.py @@ -23,20 +23,17 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""SCC caption content""" +"""SCC caption line""" from __future__ import annotations -import copy import logging from typing import Optional, List, Union -from ttconv.time_code import SmpteTimeCode +from ttconv.scc.caption_text import SccCaptionText LOGGER = logging.getLogger(__name__) -ROLL_UP_BASE_ROW = 15 - class SccCaptionLine: """Caption paragraph line""" @@ -178,81 +175,3 @@ def get_trailing_spaces(self) -> int: def __repr__(self): return "<" + self.__class__.__name__ + " " + str(self.__dict__) + ">" - - -class SccCaptionText: - """Caption text content with specific positional, temporal and styling attributes""" - - def __init__(self, text: Optional[str] = ""): - self._begin: Optional[SmpteTimeCode] = None - self._end: Optional[SmpteTimeCode] = None - self._style_properties = {} - self._text: str = "" - self._cursor = 0 # Cursor in the text - - if text is not None and text != "": - self.append(text) - - def set_begin(self, time_code: SmpteTimeCode): - """Sets begin time code""" - self._begin = copy.copy(time_code) - - def get_begin(self) -> SmpteTimeCode: - """Returns the begin time code""" - return self._begin - - def set_end(self, time_code: SmpteTimeCode): - """Sets end time code""" - self._end = copy.copy(time_code) - - def get_end(self) -> SmpteTimeCode: - """Returns the end time code""" - return self._end - - def get_text(self) -> str: - """Returns the text""" - return self._text - - def get_length(self) -> int: - """Returns text length""" - return len(self._text) - - def is_empty(self) -> bool: - """Returns whether the text is empty or not""" - return self.get_length() == 0 - - def append(self, text: str): - """Add or replace text content at cursor position""" - # print("Append text: ", text, "to", self._text, "at", self._cursor) - self._text = self._text[:self._cursor] + text + self._text[(self._cursor + len(text)):] - self._cursor += len(text) - # print("\t=>", self._text, ", cursor:", self._cursor) - - def set_cursor_at(self, position: int): - """Set text cursor position""" - self._cursor = position - - def get_cursor(self) -> int: - """Returns the cursor position""" - return self._cursor - - def backspace(self): - """Remove last character""" - self._text = self._text[:-1] - - def get_style_properties(self) -> dict: - """Sets the style properties map""" - return self._style_properties - - def add_style_property(self, style_property, value): - """Adds a style property""" - if value is None: - return - self._style_properties[style_property] = value - - def has_same_style_properties(self, other): - """Returns whether the current text has the same style properties as the other text""" - return self._style_properties == other.get_style_properties() - - def __repr__(self): - return "<" + self.__class__.__name__ + " " + str(self.__dict__) + ">" diff --git a/src/main/python/ttconv/scc/caption_text.py b/src/main/python/ttconv/scc/caption_text.py new file mode 100644 index 00000000..cc31da4a --- /dev/null +++ b/src/main/python/ttconv/scc/caption_text.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +# Copyright (c) 2020, Sandflow Consulting LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SCC caption text""" + +from __future__ import annotations + +import logging +import copy +from typing import Optional + +from ttconv.time_code import SmpteTimeCode + +LOGGER = logging.getLogger(__name__) + + +class SccCaptionText: + """Caption text content with specific positional, temporal and styling attributes""" + + def __init__(self, text: Optional[str] = ""): + self._begin: Optional[SmpteTimeCode] = None + self._end: Optional[SmpteTimeCode] = None + self._style_properties = {} + self._text: str = "" + self._cursor = 0 # Cursor in the text + + if text is not None and text != "": + self.append(text) + + def set_begin(self, time_code: SmpteTimeCode): + """Sets begin time code""" + self._begin = copy.copy(time_code) + + def get_begin(self) -> SmpteTimeCode: + """Returns the begin time code""" + return self._begin + + def set_end(self, time_code: SmpteTimeCode): + """Sets end time code""" + self._end = copy.copy(time_code) + + def get_end(self) -> SmpteTimeCode: + """Returns the end time code""" + return self._end + + def get_text(self) -> str: + """Returns the text""" + return self._text + + def get_length(self) -> int: + """Returns text length""" + return len(self._text) + + def is_empty(self) -> bool: + """Returns whether the text is empty or not""" + return self.get_length() == 0 + + def append(self, text: str): + """Add or replace text content at cursor position""" + # print("Append text: ", text, "to", self._text, "at", self._cursor) + self._text = self._text[:self._cursor] + text + self._text[(self._cursor + len(text)):] + self._cursor += len(text) + # print("\t=>", self._text, ", cursor:", self._cursor) + + def set_cursor_at(self, position: int): + """Set text cursor position""" + self._cursor = position + + def get_cursor(self) -> int: + """Returns the cursor position""" + return self._cursor + + def backspace(self): + """Remove last character""" + self._text = self._text[:-1] + + def get_style_properties(self) -> dict: + """Sets the style properties map""" + return self._style_properties + + def add_style_property(self, style_property, value): + """Adds a style property""" + if value is None: + return + self._style_properties[style_property] = value + + def has_same_style_properties(self, other): + """Returns whether the current text has the same style properties as the other text""" + return self._style_properties == other.get_style_properties() + + def __repr__(self): + return "<" + self.__class__.__name__ + " " + str(self.__dict__) + ">" diff --git a/src/main/python/ttconv/scc/paragraph.py b/src/main/python/ttconv/scc/paragraph.py index d556e54b..5479e497 100644 --- a/src/main/python/ttconv/scc/paragraph.py +++ b/src/main/python/ttconv/scc/paragraph.py @@ -32,7 +32,8 @@ from typing import Optional, List, Dict, Union from ttconv.model import Region, ContentDocument, P, Br, Span, Text -from ttconv.scc.content import SccCaptionText, SccCaptionLine +from ttconv.scc.caption_line import SccCaptionLine +from ttconv.scc.caption_text import SccCaptionText from ttconv.scc.style import SccCaptionStyle from ttconv.scc.utils import get_position_from_offsets, get_extent_from_dimensions, convert_cells_to_percentages from ttconv.style_properties import CoordinateType, ExtentType, StyleProperties, LengthType, DisplayAlignType, ShowBackgroundType, \ diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index f4f1ccad..4ab2bb62 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -38,7 +38,6 @@ from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter from ttconv.scc.config import SccReaderConfiguration, TextAlignment -from ttconv.scc.content import ROLL_UP_BASE_ROW from ttconv.scc.line import SccLine from ttconv.scc.paragraph import SccCaptionParagraph, SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS, \ SCC_ROOT_CELL_RESOLUTION_ROWS, SCC_ROOT_CELL_RESOLUTION_COLUMNS @@ -49,6 +48,8 @@ LOGGER = logging.getLogger(__name__) +ROLL_UP_BASE_ROW = 15 + class _SccContext: def __init__(self, config: Optional[SccReaderConfiguration] = None): diff --git a/src/test/python/test_scc_content.py b/src/test/python/test_scc_content.py index f2f6c6e4..2de0cd93 100644 --- a/src/test/python/test_scc_content.py +++ b/src/test/python/test_scc_content.py @@ -29,7 +29,8 @@ import unittest -from ttconv.scc.content import SccCaptionText, SccCaptionLine +from ttconv.scc.caption_line import SccCaptionLine +from ttconv.scc.caption_text import SccCaptionText from ttconv.style_properties import StyleProperties, NamedColors diff --git a/src/test/python/test_scc_line.py b/src/test/python/test_scc_line.py index c1a0d4fb..f38afbf6 100644 --- a/src/test/python/test_scc_line.py +++ b/src/test/python/test_scc_line.py @@ -29,7 +29,7 @@ import unittest from ttconv.scc.line import SccLine -from ttconv.scc.style import SccCaptionStyle +from ttconv.scc.caption_style import SccCaptionStyle from ttconv.time_code import SmpteTimeCode, FPS_30 diff --git a/src/test/python/test_scc_paragraph.py b/src/test/python/test_scc_paragraph.py index 298d6e34..2599b4a2 100644 --- a/src/test/python/test_scc_paragraph.py +++ b/src/test/python/test_scc_paragraph.py @@ -31,9 +31,9 @@ from fractions import Fraction from ttconv.model import ContentDocument, Span, Br -from ttconv.scc.content import SccCaptionLine from ttconv.scc.paragraph import SccCaptionParagraph from ttconv.scc.style import SccCaptionStyle +from ttconv.scc.caption_line import SccCaptionLine from ttconv.style_properties import TextAlignType from ttconv.time_code import SmpteTimeCode, FPS_30 From b12edf6fc2dfae9f8bf9c8085f9b36071b80305e Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Wed, 9 Aug 2023 17:55:25 +0200 Subject: [PATCH 02/13] SCC: rename paragraph.py file to caption_paragraph.py --- .../python/ttconv/scc/{paragraph.py => caption_paragraph.py} | 0 src/main/python/ttconv/scc/reader.py | 2 +- src/test/python/test_scc_paragraph.py | 2 +- src/test/python/test_scc_region.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/main/python/ttconv/scc/{paragraph.py => caption_paragraph.py} (100%) diff --git a/src/main/python/ttconv/scc/paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py similarity index 100% rename from src/main/python/ttconv/scc/paragraph.py rename to src/main/python/ttconv/scc/caption_paragraph.py diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index 4ab2bb62..71c8cabf 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -39,7 +39,7 @@ from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter from ttconv.scc.config import SccReaderConfiguration, TextAlignment from ttconv.scc.line import SccLine -from ttconv.scc.paragraph import SccCaptionParagraph, SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS, \ +from ttconv.scc.caption_paragraph import SccCaptionParagraph, SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS, \ SCC_ROOT_CELL_RESOLUTION_ROWS, SCC_ROOT_CELL_RESOLUTION_COLUMNS from ttconv.scc.style import SccCaptionStyle from ttconv.scc.word import SccWord diff --git a/src/test/python/test_scc_paragraph.py b/src/test/python/test_scc_paragraph.py index 2599b4a2..fa22f0ba 100644 --- a/src/test/python/test_scc_paragraph.py +++ b/src/test/python/test_scc_paragraph.py @@ -31,7 +31,7 @@ from fractions import Fraction from ttconv.model import ContentDocument, Span, Br -from ttconv.scc.paragraph import SccCaptionParagraph +from ttconv.scc.caption_paragraph import SccCaptionParagraph from ttconv.scc.style import SccCaptionStyle from ttconv.scc.caption_line import SccCaptionLine from ttconv.style_properties import TextAlignType diff --git a/src/test/python/test_scc_region.py b/src/test/python/test_scc_region.py index 0ef45887..79d63fa3 100644 --- a/src/test/python/test_scc_region.py +++ b/src/test/python/test_scc_region.py @@ -30,7 +30,7 @@ import unittest from ttconv.model import ContentDocument, CellResolutionType -from ttconv.scc.paragraph import SccCaptionParagraph, _SccParagraphRegion +from ttconv.scc.caption_paragraph import SccCaptionParagraph, _SccParagraphRegion from ttconv.scc.style import SccCaptionStyle from ttconv.style_properties import StyleProperties, ShowBackgroundType From a89ff365c38d2c5e0c782c2575427deffc88a798 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Wed, 16 Aug 2023 11:20:31 +0200 Subject: [PATCH 03/13] SCC: rename style.py file to caption_style.py --- src/main/python/ttconv/scc/caption_paragraph.py | 2 +- src/main/python/ttconv/scc/{style.py => caption_style.py} | 0 src/main/python/ttconv/scc/line.py | 2 +- src/main/python/ttconv/scc/reader.py | 2 +- src/test/python/test_scc_paragraph.py | 2 +- src/test/python/test_scc_region.py | 2 +- 6 files changed, 5 insertions(+), 5 deletions(-) rename src/main/python/ttconv/scc/{style.py => caption_style.py} (100%) diff --git a/src/main/python/ttconv/scc/caption_paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py index 5479e497..ecef20a3 100644 --- a/src/main/python/ttconv/scc/caption_paragraph.py +++ b/src/main/python/ttconv/scc/caption_paragraph.py @@ -34,7 +34,7 @@ from ttconv.model import Region, ContentDocument, P, Br, Span, Text from ttconv.scc.caption_line import SccCaptionLine from ttconv.scc.caption_text import SccCaptionText -from ttconv.scc.style import SccCaptionStyle +from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.utils import get_position_from_offsets, get_extent_from_dimensions, convert_cells_to_percentages from ttconv.style_properties import CoordinateType, ExtentType, StyleProperties, LengthType, DisplayAlignType, ShowBackgroundType, \ TextAlignType, NamedColors diff --git a/src/main/python/ttconv/scc/style.py b/src/main/python/ttconv/scc/caption_style.py similarity index 100% rename from src/main/python/ttconv/scc/style.py rename to src/main/python/ttconv/scc/caption_style.py diff --git a/src/main/python/ttconv/scc/line.py b/src/main/python/ttconv/scc/line.py index 34a3599f..2b65cd17 100644 --- a/src/main/python/ttconv/scc/line.py +++ b/src/main/python/ttconv/scc/line.py @@ -37,7 +37,7 @@ from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter from ttconv.scc.disassembly import get_color_disassembly, get_font_style_disassembly, get_text_decoration_disassembly -from ttconv.scc.style import SccCaptionStyle +from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.word import SccWord from ttconv.time_code import SmpteTimeCode, FPS_30 diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index 71c8cabf..6dd4b8e3 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -41,7 +41,7 @@ from ttconv.scc.line import SccLine from ttconv.scc.caption_paragraph import SccCaptionParagraph, SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS, \ SCC_ROOT_CELL_RESOLUTION_ROWS, SCC_ROOT_CELL_RESOLUTION_COLUMNS -from ttconv.scc.style import SccCaptionStyle +from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.word import SccWord from ttconv.style_properties import StyleProperties, LengthType, GenericFontFamilyType from ttconv.time_code import SmpteTimeCode diff --git a/src/test/python/test_scc_paragraph.py b/src/test/python/test_scc_paragraph.py index fa22f0ba..f3e51f3c 100644 --- a/src/test/python/test_scc_paragraph.py +++ b/src/test/python/test_scc_paragraph.py @@ -32,7 +32,7 @@ from ttconv.model import ContentDocument, Span, Br from ttconv.scc.caption_paragraph import SccCaptionParagraph -from ttconv.scc.style import SccCaptionStyle +from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.caption_line import SccCaptionLine from ttconv.style_properties import TextAlignType from ttconv.time_code import SmpteTimeCode, FPS_30 diff --git a/src/test/python/test_scc_region.py b/src/test/python/test_scc_region.py index 79d63fa3..b84dc63f 100644 --- a/src/test/python/test_scc_region.py +++ b/src/test/python/test_scc_region.py @@ -31,7 +31,7 @@ from ttconv.model import ContentDocument, CellResolutionType from ttconv.scc.caption_paragraph import SccCaptionParagraph, _SccParagraphRegion -from ttconv.scc.style import SccCaptionStyle +from ttconv.scc.caption_style import SccCaptionStyle from ttconv.style_properties import StyleProperties, ShowBackgroundType From 5e42cecaf3f11fc3b2492ede6996168726f0bed6 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Wed, 16 Aug 2023 11:22:54 +0200 Subject: [PATCH 04/13] SCC: SccWord refactoring for normal characters word repetition support Alternative to https://github.com/sandflow/ttconv/pull/392 --- src/main/python/ttconv/scc/reader.py | 84 +++++++++++++--------------- src/main/python/ttconv/scc/word.py | 39 +++++++++---- src/test/python/test_scc_reader.py | 28 ++++++++++ src/test/python/test_scc_word.py | 35 ++++++++++++ 4 files changed, 130 insertions(+), 56 deletions(-) diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index 6dd4b8e3..3f183af7 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -29,7 +29,7 @@ import copy import logging -from typing import Optional, Tuple +from typing import Optional, Tuple, Type from ttconv.model import ContentDocument, Body, Div, CellResolutionType, ActiveAreaType from ttconv.scc.codes.attribute_codes import SccAttributeCode @@ -39,8 +39,8 @@ from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter from ttconv.scc.config import SccReaderConfiguration, TextAlignment from ttconv.scc.line import SccLine -from ttconv.scc.caption_paragraph import SccCaptionParagraph, SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS, \ - SCC_ROOT_CELL_RESOLUTION_ROWS, SCC_ROOT_CELL_RESOLUTION_COLUMNS +from ttconv.scc.caption_paragraph import SccCaptionParagraph, SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, \ + SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS, SCC_ROOT_CELL_RESOLUTION_ROWS, SCC_ROOT_CELL_RESOLUTION_COLUMNS from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.word import SccWord from ttconv.style_properties import StyleProperties, LengthType, GenericFontFamilyType @@ -64,8 +64,8 @@ def __init__(self, config: Optional[SccReaderConfiguration] = None): self.safe_area_y_offset: int = 0 # Previously read SCC word value - self.previous_word: SccWord = None - self.previous_code_type = None + self.previous_word: Optional[SccWord] = None + self.previous_word_type: Optional[Type] = None # Buffered caption being built self.buffered_caption: Optional[SccCaptionParagraph] = None @@ -250,7 +250,7 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime font_style = mid_row_code.get_font_style() text_decoration = mid_row_code.get_text_decoration() - if self.previous_code_type is not SccMidRowCode: + if self.previous_word_type is not SccMidRowCode: # In case of multiple mid-row codes, move right only after the first code # If there is already text on the current line @@ -483,7 +483,7 @@ def process_line(self, line: SccLine) -> SmpteTimeCode: for scc_word in line.scc_words: - if self.previous_word is not None and self.previous_word.value == scc_word.value and self.previous_word.is_control_code(): + if self.previous_word is not None and self.previous_word.value == scc_word.value and self.previous_word.is_code(): self.previous_word = None continue @@ -494,68 +494,62 @@ def process_line(self, line: SccLine) -> SmpteTimeCode: if scc_word.byte_1 < 0x20: - control_code = SccControlCode.find(scc_word.value) - attribute_code = SccAttributeCode.find(scc_word.value) - mid_row_code = SccMidRowCode.find(scc_word.value) - pac = SccPreambleAddressCode.find(scc_word.byte_1, scc_word.byte_2) - spec_char = SccSpecialCharacter.find(scc_word.value) - extended_char = SccExtendedCharacter.find(scc_word.value) - - if pac is not None: - debug += "[PAC|" + str(pac.get_row()) + "|" + str(pac.get_indent()) - if pac.get_color() is not None: - debug += "|" + str(pac.get_color()) - if pac.get_font_style() is not None: + scc_code = scc_word.get_code() + + if isinstance(scc_code, SccPreambleAddressCode): + debug += "[PAC|" + str(scc_code.get_row()) + "|" + str(scc_code.get_indent()) + if scc_code.get_color() is not None: + debug += "|" + str(scc_code.get_color()) + if scc_code.get_font_style() is not None: debug += "|I" - if pac.get_text_decoration() is not None: + if scc_code.get_text_decoration() is not None: debug += "|U" debug += "/" + hex(scc_word.value) + "]" - self.process_preamble_address_code(pac, line.time_code) - self.previous_code_type = type(pac) + self.process_preamble_address_code(scc_code, line.time_code) + self.previous_word_type = type(scc_code) - elif attribute_code is not None: + elif isinstance(scc_code, SccAttributeCode): debug += "[ATC/" + hex(scc_word.value) + "]" - self.process_attribute_code(attribute_code) - self.previous_code_type = type(attribute_code) - - elif mid_row_code is not None: - debug += "[MRC|" + mid_row_code.get_name() + "/" + hex(scc_word.value) + "]" - self.process_mid_row_code(mid_row_code, line.time_code) - self.previous_code_type = type(mid_row_code) + self.process_attribute_code(scc_code) + self.previous_word_type = type(scc_code) - elif control_code is not None: - debug += "[CC|" + control_code.get_name() + "/" + hex(scc_word.value) + "]" - self.process_control_code(control_code, line.time_code) - self.previous_code_type = type(control_code) + elif isinstance(scc_code, SccMidRowCode): + debug += "[MRC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" + self.process_mid_row_code(scc_code, line.time_code) + self.previous_word_type = type(scc_code) + elif isinstance(scc_code, SccControlCode): + debug += "[CC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" + self.process_control_code(scc_code, line.time_code) + self.previous_word_type = type(scc_code) - elif spec_char is not None: - word = spec_char.get_unicode_value() + elif isinstance(scc_code, SccSpecialCharacter): + word = scc_code.get_unicode_value() debug += word self.process_text(word, line.time_code) - self.previous_code_type = type(spec_char) + self.previous_word_type = type(scc_code) - elif extended_char is not None: + elif isinstance(scc_code, SccExtendedCharacter): if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): self.active_caption.get_current_text().backspace() else: self.buffered_caption.get_current_text().backspace() - word = extended_char.get_unicode_value() + word = scc_code.get_unicode_value() debug += word self.process_text(word, line.time_code) - self.previous_code_type = type(extended_char) + self.previous_word_type = type(scc_code) else: debug += "[??/" + hex(scc_word.value) + "]" LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value)) - self.previous_code_type = None + self.previous_word_type = None else: - word = scc_word.to_text() - debug += word - self.process_text(word, line.time_code) - self.previous_code_type = str + text = scc_word.to_text() + debug += text + self.process_text(text, line.time_code) + self.previous_word_type = str self.previous_word = scc_word diff --git a/src/main/python/ttconv/scc/word.py b/src/main/python/ttconv/scc/word.py index 8ca86f20..84f70c06 100644 --- a/src/main/python/ttconv/scc/word.py +++ b/src/main/python/ttconv/scc/word.py @@ -27,6 +27,14 @@ from __future__ import annotations +from typing import Optional + +from ttconv.scc.codes import SccCode +from ttconv.scc.codes.attribute_codes import SccAttributeCode +from ttconv.scc.codes.control_codes import SccControlCode +from ttconv.scc.codes.mid_row_codes import SccMidRowCode +from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode +from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter from ttconv.scc.codes.standard_characters import SCC_STANDARD_CHARACTERS_MAPPING PARITY_BIT_MASK = 0b01111111 @@ -35,10 +43,10 @@ class SccWord: """SCC hexadecimal word definition""" - def __init__(self): - self.value = None - self.byte_1 = None - self.byte_2 = None + def __init__(self, byte_1: int, byte_2: int): + self.byte_1 = byte_1 + self.byte_2 = byte_2 + self.value = byte_1 * 0x100 + byte_2 @staticmethod def _is_hex_word(word: str) -> bool: @@ -69,12 +77,10 @@ def from_bytes(byte_1: int, byte_2: int) -> SccWord: """Creates a SCC word from the specified bytes""" if byte_1 > 0xFF or byte_2 > 0xFF: raise ValueError(f"Expected two 1-byte int values, instead got {hex(byte_1)} and {hex(byte_2)}") - scc_word = SccWord() - scc_word.byte_1 = SccWord._decipher_parity_bit(byte_1) - scc_word.byte_2 = SccWord._decipher_parity_bit(byte_2) - scc_word.value = scc_word.byte_1 * 0x100 + scc_word.byte_2 + byte_1 = SccWord._decipher_parity_bit(byte_1) + byte_2 = SccWord._decipher_parity_bit(byte_2) - return scc_word + return SccWord(byte_1, byte_2) @staticmethod def from_str(hex_word: str) -> SccWord: @@ -89,7 +95,18 @@ def to_text(self) -> str: """Converts SCC word to text""" return ''.join(SCC_STANDARD_CHARACTERS_MAPPING.get(byte, chr(byte)) for byte in [self.byte_1, self.byte_2] if byte != 0x00) - def is_control_code(self) -> bool: - """Returns true if the word is a control code, i.e. the first byte + def get_code(self) -> Optional[SccCode]: + """Find corresponding code""" + if self.is_code(): + return SccControlCode.find(self.value) or \ + SccAttributeCode.find(self.value) or \ + SccMidRowCode.find(self.value) or \ + SccPreambleAddressCode.find(self.byte_1, self.byte_2) or \ + SccSpecialCharacter.find(self.value) or \ + SccExtendedCharacter.find(self.value) + return None + + def is_code(self) -> bool: + """Returns true if the word is an SCC code, i.e. the first byte is a non-printing character in the range 10h to 1Fh.""" return 0x10 <= self.byte_1 <= 0x1F diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index a2cfbac0..5eb25eb0 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -250,6 +250,34 @@ def test_scc_pop_on_content_unexpectedly_ended(self): "consectetur adipiscing elit.") self.assertEqual(region_1, p_list[0].get_region()) + def test_scc_double_word_in_content(self): + scc_content = """"Scenarist_SCC V1.0 +01:02:53:14 9420 9420 94AE 94AE 9452 9452 97A1 97A1 20F2 E56D E56D 62E5 F220 9137 9137 9137 9137 942F 942F +01:02:55:14 942c 942c +""" + scc_disassembly = """\ +01:02:53:14 {RCL}{RCL}{ENM}{ENM}{1404}{1404}{TO1}{TO1} remember ♪♪♪♪{EOC}{EOC} +""" + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + first_span = p_list[0][0] + first_text = first_span[0].get_text() + + self.assertEqual(" remember ♪♪", first_text) + def test_2_rows_roll_up_content(self): scc_content = """\ Scenarist_SCC V1.0 diff --git a/src/test/python/test_scc_word.py b/src/test/python/test_scc_word.py index dcec6e4b..4931b1b3 100644 --- a/src/test/python/test_scc_word.py +++ b/src/test/python/test_scc_word.py @@ -29,6 +29,10 @@ import unittest +from ttconv.scc.codes.control_codes import SccControlCode +from ttconv.scc.codes.mid_row_codes import SccMidRowCode +from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode +from ttconv.scc.codes.special_characters import SccExtendedCharacter from ttconv.scc.word import SccWord @@ -101,3 +105,34 @@ def test_scc_word_to_text(self): self.assertEqual('\x01', scc_word.to_text()) self.assertRaises(ValueError, SccWord.from_value, 0x01020304) + + def test_scc_word_get_code(self): + self.assertEqual(SccControlCode.RCL, SccWord.from_str("9420").get_code()) + self.assertEqual(SccMidRowCode.ITALICS, SccWord.from_str("91ae").get_code()) + self.assertEqual(SccControlCode.BS, SccWord.from_str("9421").get_code()) + self.assertEqual(None, SccWord.from_str("4c6f").get_code()) # "Lo" + self.assertEqual(None, SccWord.from_str("7265").get_code()) # "re" + self.assertEqual(None, SccWord.from_str("6d20").get_code()) # "m " + self.assertEqual(None, SccWord.from_str("6970").get_code()) # "ip" + self.assertEqual(None, SccWord.from_str("7375").get_code()) # "su" + self.assertEqual(None, SccWord.from_str("6d20").get_code()) # "m " + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_ACUTE, SccWord.from_str("9220").get_code()) + self.assertEqual(SccControlCode.EDM, SccWord.from_str("942c").get_code()) + self.assertEqual(SccControlCode.EOC, SccWord.from_str("942f").get_code()) + self.assertEqual(SccControlCode.RU2, SccWord.from_str("9425").get_code()) + self.assertEqual(SccControlCode.CR, SccWord.from_str("94ad").get_code()) + self.assertEqual(SccPreambleAddressCode, SccWord.from_str("9673").get_code().__class__) + self.assertEqual(None, SccWord.from_str("636f").get_code()) # "co" + self.assertEqual(None, SccWord.from_str("6e73").get_code()) # "ns" + self.assertEqual(None, SccWord.from_str("6563").get_code()) # "ec" + self.assertEqual(None, SccWord.from_str("7465").get_code()) # "te" + self.assertEqual(None, SccWord.from_str("7475").get_code()) # "tu" + self.assertEqual(None, SccWord.from_str("7220").get_code()) # "r " + self.assertEqual(None, SccWord.from_str("6164").get_code()) # "ad" + self.assertEqual(None, SccWord.from_str("6970").get_code()) # "ip" + self.assertEqual(None, SccWord.from_str("6973").get_code()) # "is" + self.assertEqual(None, SccWord.from_str("6369").get_code()) # "ci" + self.assertEqual(None, SccWord.from_str("6e67").get_code()) # "ng" + self.assertEqual(None, SccWord.from_str("2065").get_code()) # " e" + self.assertEqual(None, SccWord.from_str("6c69").get_code()) # "li" + self.assertEqual(None, SccWord.from_str("742e").get_code()) # "t." From 4222b0611414a2f8a978ef9ecf247e1e5eb81722 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Thu, 10 Aug 2023 11:38:55 +0200 Subject: [PATCH 05/13] SCC: extract SccContext to specific file and refactor SccLine processing function --- src/main/python/ttconv/scc/context.py | 473 +++++++++++++++++++++++ src/main/python/ttconv/scc/line.py | 84 +++- src/main/python/ttconv/scc/reader.py | 531 +------------------------- 3 files changed, 563 insertions(+), 525 deletions(-) create mode 100644 src/main/python/ttconv/scc/context.py diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py new file mode 100644 index 00000000..f2cc8327 --- /dev/null +++ b/src/main/python/ttconv/scc/context.py @@ -0,0 +1,473 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +# Copyright (c) 2020, Sandflow Consulting LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SCC context""" + +from __future__ import annotations + +import copy +from typing import Optional, Type, Tuple + +from ttconv.model import Div +from ttconv.scc.caption_paragraph import SccCaptionParagraph +from ttconv.scc.caption_style import SccCaptionStyle +from ttconv.scc.codes.attribute_codes import SccAttributeCode +from ttconv.scc.codes.control_codes import SccControlCode +from ttconv.scc.codes.mid_row_codes import SccMidRowCode +from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode +from ttconv.scc.config import SccReaderConfiguration, TextAlignment +from ttconv.scc.word import SccWord +from ttconv.style_properties import StyleProperties +from ttconv.time_code import SmpteTimeCode + +ROLL_UP_BASE_ROW = 15 + + +class SccContext: + """SCC context for reader""" + + def __init__(self, config: Optional[SccReaderConfiguration] = None): + # Caption paragraphs container + self.div: Optional[Div] = None + + # Caption paragraphs counter + self.count: int = 0 + + # Screen safe area offsets + self.safe_area_x_offset: int = 0 + self.safe_area_y_offset: int = 0 + + # Previously read SCC word value + self.previous_word: Optional[SccWord] = None + self.previous_word_type: Optional[Type] = None + + # Buffered caption being built + self.buffered_caption: Optional[SccCaptionParagraph] = None + # Captions being displayed + self.active_caption: Optional[SccCaptionParagraph] = None + # Caption style (Pop-on, Roll-up, Paint-on) currently processed + self.current_style: Optional[SccCaptionStyle] = None + + # Roll-up caption number of lines + self.roll_up_depth: int = 0 + + # Cursor position in the active area + self.active_cursor: Tuple[int, int] = (0, 0) + + self.current_text_decoration = None + self.current_color = None + self.current_font_style = None + + # Text alignment + self.text_alignment = TextAlignment.AUTO if config is None else config.text_align + + def set_safe_area(self, safe_area_x_offset: int, safe_area_y_offset: int): + """Sets the safe area""" + self.safe_area_x_offset = safe_area_x_offset + self.safe_area_y_offset = safe_area_y_offset + + def has_active_caption(self) -> bool: + """Returns whether captions are being displayed or not""" + return self.active_caption is not None + + def set_buffered_caption_begin_time(self, time_code: SmpteTimeCode): + """Initializes the current buffered caption with begin time""" + if self.buffered_caption is not None: + self.buffered_caption.set_begin(time_code) + + def initialize_active_caption(self, begin_time_code: SmpteTimeCode): + """Initializes the current active caption with id and begin time""" + if self.active_caption is not None: + if not self.active_caption.get_id(): + self.count += 1 + self.active_caption.set_id("caption" + str(self.count)) + + self.active_caption.set_begin(begin_time_code) + + def push_buffered_to_active_captions(self): + """Send the current buffered caption to the active captions list""" + if self.buffered_caption is not None and self.buffered_caption.get_current_text(): + if not self.buffered_caption.get_id(): + self.count += 1 + self.buffered_caption.set_id("caption" + str(self.count)) + + self.active_caption = self.buffered_caption + self.buffered_caption = None + + def flip_buffered_to_active_captions(self, time_code: Optional[SmpteTimeCode] = None): + """ + Flip the current buffered caption with the last active captions list, + and push to model if an end time code is specified. + """ + temporary_caption = None + + if self.has_active_caption(): + temporary_caption = self.active_caption + + if time_code is not None: + # End of display of active captions + if self.has_active_caption(): + self.push_active_caption_to_model(time_code) + + self.push_buffered_to_active_captions() + + if temporary_caption is not None: + self.buffered_caption = temporary_caption + + def push_active_caption_to_model(self, time_code: SmpteTimeCode, clear_active_caption: bool = True): + """Sets end time to the last active caption, and pushes it into the data model""" + if self.has_active_caption(): + self.active_cursor = self.active_caption.get_cursor() + + previous_caption = self.active_caption + previous_caption.set_end(time_code) + + if clear_active_caption: + self.active_caption = None + + self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) + + def paint_on_active_caption(self, time_code: SmpteTimeCode): + """Initialize active caption for paint-on style""" + active_style = SccCaptionStyle.PaintOn + copied_lines = [] + cursor = self.active_cursor + + if self.has_active_caption(): + active_style = self.active_caption.get_caption_style() + cursor = self.active_caption.get_cursor() + + # Copy buffered lines + copied_lines = self.active_caption.copy_lines() + + # Push active to model if there is one + self.push_active_caption_to_model(time_code) + + # Initialize new buffered caption + self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, active_style) + self.initialize_active_caption(time_code) + + if len(copied_lines) > 0: + # Set remaining lines to the new buffered caption + self.active_caption.set_lines(copied_lines) + + self.active_caption.set_cursor_at(cursor[0], cursor[1]) + + def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: SmpteTimeCode): + """Processes SCC Preamble Address Code it to the map to model""" + + pac_row = pac.get_row() + pac_indent = pac.get_indent() + + if self.current_style is SccCaptionStyle.PaintOn: + + self.paint_on_active_caption(time_code) + + if self.active_caption.get_caption_style() is SccCaptionStyle.PaintOn: + # Clear target row on Paint-On style + target_row = self.active_caption.get_lines().get(pac_row) + if target_row is not None: + target_row.clear() + + self.active_caption.set_cursor_at(pac_row, pac_indent) + + if self.active_caption.get_current_text() is None: + self.active_caption.new_caption_text() + + elif self.current_style is SccCaptionStyle.RollUp: + + if not self.has_active_caption(): + # If there is no current active caption, initialize an empty new paragraph + self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.RollUp) + self.initialize_active_caption(time_code) + + # Ignore PACs for rows 5-11, but get indent from PACs for rows 1-4 and 12-15. (Roll-Up) + if pac_row in range(5, 12): + self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW) + self.active_caption.new_caption_text() + return + + # Force roll-up paragraph to belong to the same region + self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW, pac_indent) + + self.active_caption.new_caption_text() + + else: # Pop-On Style + + if self.buffered_caption is None: + self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.PopOn) + + # set cursor in paragraph and create line or text if necessary + self.buffered_caption.set_cursor_at(pac_row, pac_indent) + + self.buffered_caption.new_caption_text() + + self.current_color = pac.get_color() + self.current_font_style = pac.get_font_style() + self.current_text_decoration = pac.get_text_decoration() + + if self.has_active_caption(): + self.active_cursor = self.active_caption.get_cursor() + + def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTimeCode): + """Processes SCC Mid-Row Code to map it to the model""" + + # If the Paint-On or Roll-Up style is activated, write directly on active caption + processed_caption = self.buffered_caption + if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): + processed_caption = self.active_caption + + if processed_caption is None: + raise ValueError("No current SCC caption initialized") + + color = mid_row_code.get_color() + font_style = mid_row_code.get_font_style() + text_decoration = mid_row_code.get_text_decoration() + + if self.previous_word_type is not SccMidRowCode: + # In case of multiple mid-row codes, move right only after the first code + + # If there is already text on the current line + if processed_caption.get_current_text() is not None \ + and processed_caption.get_current_text().get_text() != "": + + # In case of paint-on replacing text + if self.current_style is SccCaptionStyle.PaintOn \ + and processed_caption.get_current_line().get_cursor() < processed_caption.get_current_line().get_length(): + processed_caption.append_text(" ") + + else: + if text_decoration is None: + processed_caption.new_caption_text() + processed_caption.append_text(" ") + else: + processed_caption.append_text(" ") + processed_caption.new_caption_text() + + else: + processed_caption.append_text(" ") + + self.current_color = color + self.current_font_style = font_style + self.current_text_decoration = text_decoration + + else: + if color is not None: + self.current_color = color + if font_style is not None: + self.current_font_style = font_style + if text_decoration is not None: + self.current_text_decoration = text_decoration + + processed_caption.append_text(" ") + processed_caption.new_caption_text() + + if processed_caption.get_caption_style() is SccCaptionStyle.PaintOn: + processed_caption.get_current_text().set_begin(time_code) + + def process_attribute_code(self, attribute_code: SccAttributeCode): + """Processes SCC Attribute Code to map it to the model""" + + # If the Paint-On or Roll-Up style is activated, write directly on active caption + processed_caption = self.buffered_caption + if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): + processed_caption = self.active_caption + + if processed_caption is None or processed_caption.get_current_text() is None: + raise ValueError("No current SCC caption nor content initialized") + + if processed_caption.get_current_text() is not None and processed_caption.get_current_text().get_text(): + processed_caption.new_caption_text() + + if attribute_code.is_background(): + processed_caption.get_current_text().add_style_property(StyleProperties.BackgroundColor, attribute_code.get_color()) + else: + processed_caption.get_current_text().add_style_property(StyleProperties.Color, attribute_code.get_color()) + + processed_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, + attribute_code.get_text_decoration()) + + def process_control_code(self, control_code: SccControlCode, time_code: SmpteTimeCode): + """Processes SCC Control Code to map it to the model""" + + processed_caption = self.buffered_caption + + if control_code is SccControlCode.RCL: + # Start a new Pop-On caption + self.current_style = SccCaptionStyle.PopOn + + elif control_code is SccControlCode.RDC: + # Start a new Paint-On caption + self.current_style = SccCaptionStyle.PaintOn + + elif control_code in (SccControlCode.RU2, SccControlCode.RU3, SccControlCode.RU4): + # Start a new Roll-Up caption + self.current_style = SccCaptionStyle.RollUp + + if control_code is SccControlCode.RU2: + self.roll_up_depth = 2 + + elif control_code is SccControlCode.RU3: + self.roll_up_depth = 3 + + elif control_code is SccControlCode.RU4: + self.roll_up_depth = 4 + + else: + # If the Paint-On or Roll-Up style is activated, write directly on active caption + if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): + processed_caption = self.active_caption + + if control_code is SccControlCode.EOC: + # Display caption (Pop-On) + self.set_buffered_caption_begin_time(time_code) + self.flip_buffered_to_active_captions(time_code) + + if self.has_active_caption(): + # Set text alignment + if self.text_alignment == TextAlignment.AUTO: + text_alignment = self.active_caption.guess_text_alignment() + else: + text_alignment = self.text_alignment.text_align + + # Apply text alignment + self.active_caption.add_style_property(StyleProperties.TextAlign, text_alignment) + + elif control_code is SccControlCode.EDM: + # Erase displayed captions + if self.has_active_caption(): + if time_code is not None: + # End time is exclusive in the model, set it to the next frame + end_time_code = copy.copy(time_code) + end_time_code.add_frames() + else: + end_time_code = time_code + + self.push_active_caption_to_model(end_time_code) + + elif control_code is SccControlCode.ENM: + # Erase buffered caption + self.buffered_caption = None + + elif control_code is SccControlCode.TO1: + processed_caption.indent_cursor(1) + + elif control_code is SccControlCode.TO2: + processed_caption.indent_cursor(2) + + elif control_code is SccControlCode.TO3: + processed_caption.indent_cursor(3) + + elif control_code is SccControlCode.CR: + # Roll the displayed caption up one row (Roll-Up) + + if self.has_active_caption(): + # Push active caption to model (but don't erase it) + self.push_active_caption_to_model(time_code, False) + # Roll the active caption up + self.active_caption.roll_up() + # Get the remaining lines to initialize the following caption with the expected depth + previous_lines = self.active_caption.get_last_caption_lines(self.roll_up_depth - 1) + + # Initialize the new caption with the previous lines + self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.RollUp) + self.initialize_active_caption(time_code) + self.active_caption.set_lines(previous_lines) + + self.active_caption.set_cursor_at(self.active_cursor[0], self.active_cursor[1]) + + elif control_code is SccControlCode.DER: + # Delete to End of Row (Paint-On) + # The DER may be issued from any point on a row to delete all displayable characters, transparent + # spaces, and mid-row codes from (and including) the current cell to the end of the row. + # Not used in this implementation since this SCC reader does not map the text overlapping into + # the model (i.e. a row is erased when a PAC is received, so before a new caption is written onto it). + pass + + elif control_code is SccControlCode.BS: + # Backspace + # When a Backspace is received, the cursor moves to the left one column position erasing + # the character or Mid-Row Code occupying that location, unless the cursor is in Column 1 + processed_caption.get_current_text().backspace() + + def process_text(self, word: str, time_code: SmpteTimeCode): + """Processes SCC text words""" + if self.current_style is SccCaptionStyle.PaintOn: + if word.startswith(" "): + + if self.active_caption.get_caption_style() is not SccCaptionStyle.PaintOn: + self.paint_on_active_caption(time_code) + self.active_caption.append_text(word) + + else: + self.active_caption.new_caption_text() + self.active_caption.append_text(word) + self.active_caption.get_current_text().set_begin(time_code) + + + elif word.endswith(" "): + self.active_caption.append_text(word) + + if self.active_caption.get_caption_style() is not SccCaptionStyle.PaintOn: + self.paint_on_active_caption(time_code) + else: + self.active_caption.new_caption_text() + self.active_caption.get_current_text().set_begin(time_code) + + else: + if not self.has_active_caption(): + self.paint_on_active_caption(time_code) + + self.active_caption.append_text(word) + + self.active_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) + self.active_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) + self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) + + elif self.current_style is SccCaptionStyle.RollUp: + self.active_caption.append_text(word) + + self.active_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) + self.active_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) + self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) + + else: + self.buffered_caption.append_text(word) + + self.buffered_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) + self.buffered_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) + self.buffered_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) + + if self.has_active_caption(): + self.active_cursor = self.active_caption.get_cursor() + + def flush(self, time_code: Optional[SmpteTimeCode] = None): + """Flushes the remaining current caption""" + if self.has_active_caption(): + self.push_active_caption_to_model(time_code) + + if self.buffered_caption is not None: + # Remove the buffered caption + self.buffered_caption = None diff --git a/src/main/python/ttconv/scc/line.py b/src/main/python/ttconv/scc/line.py index 2b65cd17..e9dcd56d 100644 --- a/src/main/python/ttconv/scc/line.py +++ b/src/main/python/ttconv/scc/line.py @@ -31,13 +31,14 @@ import re from typing import List, Optional +from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.codes.attribute_codes import SccAttributeCode from ttconv.scc.codes.control_codes import SccControlCode from ttconv.scc.codes.mid_row_codes import SccMidRowCode from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter +from ttconv.scc.context import SccContext from ttconv.scc.disassembly import get_color_disassembly, get_font_style_disassembly, get_text_decoration_disassembly -from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.word import SccWord from ttconv.time_code import SmpteTimeCode, FPS_30 @@ -156,3 +157,84 @@ def to_disassembly(self) -> str: disassembly_line += scc_word.to_text() return disassembly_line + + def process(self, context: SccContext) -> SmpteTimeCode: + """Converts the SCC line to the data model""" + + debug = str(self.time_code) + "\t" + + for scc_word in self.scc_words: + + if context.previous_word is not None and context.previous_word.value == scc_word.value and context.previous_word.is_code(): + context.previous_word = None + continue + + self.time_code.add_frames() + + if scc_word.value == 0x0000: + continue + + if scc_word.byte_1 < 0x20: + + scc_code = scc_word.get_code() + + if isinstance(scc_code, SccPreambleAddressCode): + debug += "[PAC|" + str(scc_code.get_row()) + "|" + str(scc_code.get_indent()) + if scc_code.get_color() is not None: + debug += "|" + str(scc_code.get_color()) + if scc_code.get_font_style() is not None: + debug += "|I" + if scc_code.get_text_decoration() is not None: + debug += "|U" + debug += "/" + hex(scc_word.value) + "]" + context.process_preamble_address_code(scc_code, self.time_code) + context.previous_word_type = type(scc_code) + + elif isinstance(scc_code, SccAttributeCode): + debug += "[ATC/" + hex(scc_word.value) + "]" + context.process_attribute_code(scc_code) + context.previous_word_type = type(scc_code) + + elif isinstance(scc_code, SccMidRowCode): + debug += "[MRC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" + context.process_mid_row_code(scc_code, self.time_code) + context.previous_word_type = type(scc_code) + + elif isinstance(scc_code, SccControlCode): + debug += "[CC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" + context.process_control_code(scc_code, self.time_code) + context.previous_word_type = type(scc_code) + + elif isinstance(scc_code, SccSpecialCharacter): + word = scc_code.get_unicode_value() + debug += word + context.process_text(word, self.time_code) + context.previous_word_type = type(scc_code) + + elif isinstance(scc_code, SccExtendedCharacter): + if context.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): + context.active_caption.get_current_text().backspace() + else: + context.buffered_caption.get_current_text().backspace() + + word = scc_code.get_unicode_value() + debug += word + context.process_text(word, self.time_code) + context.previous_word_type = type(scc_code) + + else: + debug += "[??/" + hex(scc_word.value) + "]" + LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value)) + context.previous_word_type = None + + else: + text = scc_word.to_text() + debug += text + context.process_text(text, self.time_code) + context.previous_word_type = str + + context.previous_word = scc_word + + LOGGER.debug(debug) + + return self.time_code diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index 3f183af7..3ddb4d12 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -27,536 +27,19 @@ from __future__ import annotations -import copy import logging -from typing import Optional, Tuple, Type +from typing import Optional from ttconv.model import ContentDocument, Body, Div, CellResolutionType, ActiveAreaType -from ttconv.scc.codes.attribute_codes import SccAttributeCode -from ttconv.scc.codes.control_codes import SccControlCode -from ttconv.scc.codes.mid_row_codes import SccMidRowCode -from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode -from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter -from ttconv.scc.config import SccReaderConfiguration, TextAlignment -from ttconv.scc.line import SccLine -from ttconv.scc.caption_paragraph import SccCaptionParagraph, SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, \ +from ttconv.scc.caption_paragraph import SCC_SAFE_AREA_CELL_RESOLUTION_ROWS, \ SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS, SCC_ROOT_CELL_RESOLUTION_ROWS, SCC_ROOT_CELL_RESOLUTION_COLUMNS -from ttconv.scc.caption_style import SccCaptionStyle -from ttconv.scc.word import SccWord +from ttconv.scc.config import SccReaderConfiguration +from ttconv.scc.context import SccContext +from ttconv.scc.line import SccLine from ttconv.style_properties import StyleProperties, LengthType, GenericFontFamilyType -from ttconv.time_code import SmpteTimeCode LOGGER = logging.getLogger(__name__) -ROLL_UP_BASE_ROW = 15 - - -class _SccContext: - def __init__(self, config: Optional[SccReaderConfiguration] = None): - # Caption paragraphs container - self.div: Optional[Div] = None - - # Caption paragraphs counter - self.count: int = 0 - - # Screen safe area offsets - self.safe_area_x_offset: int = 0 - self.safe_area_y_offset: int = 0 - - # Previously read SCC word value - self.previous_word: Optional[SccWord] = None - self.previous_word_type: Optional[Type] = None - - # Buffered caption being built - self.buffered_caption: Optional[SccCaptionParagraph] = None - # Captions being displayed - self.active_caption: Optional[SccCaptionParagraph] = None - # Caption style (Pop-on, Roll-up, Paint-on) currently processed - self.current_style: Optional[SccCaptionStyle] = None - - # Roll-up caption number of lines - self.roll_up_depth: int = 0 - - # Cursor position in the active area - self.active_cursor: Tuple[int, int] = (0, 0) - - self.current_text_decoration = None - self.current_color = None - self.current_font_style = None - - # Text alignment - self.text_alignment = TextAlignment.AUTO if config is None else config.text_align - - def set_safe_area(self, safe_area_x_offset: int, safe_area_y_offset: int): - """Sets the safe area""" - self.safe_area_x_offset = safe_area_x_offset - self.safe_area_y_offset = safe_area_y_offset - - def has_active_caption(self) -> bool: - """Returns whether captions are being displayed or not""" - return self.active_caption is not None - - def set_buffered_caption_begin_time(self, time_code: SmpteTimeCode): - """Initializes the current buffered caption with begin time""" - if self.buffered_caption is not None: - self.buffered_caption.set_begin(time_code) - - def initialize_active_caption(self, begin_time_code: SmpteTimeCode): - """Initializes the current active caption with id and begin time""" - if self.active_caption is not None: - if not self.active_caption.get_id(): - self.count += 1 - self.active_caption.set_id("caption" + str(self.count)) - - self.active_caption.set_begin(begin_time_code) - - def push_buffered_to_active_captions(self): - """Send the current buffered caption to the active captions list""" - if self.buffered_caption is not None and self.buffered_caption.get_current_text(): - if not self.buffered_caption.get_id(): - self.count += 1 - self.buffered_caption.set_id("caption" + str(self.count)) - - self.active_caption = self.buffered_caption - self.buffered_caption = None - - def flip_buffered_to_active_captions(self, time_code: Optional[SmpteTimeCode] = None): - """ - Flip the current buffered caption with the last active captions list, - and push to model if an end time code is specified. - """ - temporary_caption = None - - if self.has_active_caption(): - temporary_caption = self.active_caption - - if time_code is not None: - # End of display of active captions - if self.has_active_caption(): - self.push_active_caption_to_model(time_code) - - self.push_buffered_to_active_captions() - - if temporary_caption is not None: - self.buffered_caption = temporary_caption - - def push_active_caption_to_model(self, time_code: SmpteTimeCode, clear_active_caption: bool = True): - """Sets end time to the last active caption, and pushes it into the data model""" - if self.has_active_caption(): - self.active_cursor = self.active_caption.get_cursor() - - previous_caption = self.active_caption - previous_caption.set_end(time_code) - - if clear_active_caption: - self.active_caption = None - - self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) - - def paint_on_active_caption(self, time_code: SmpteTimeCode): - """Initialize active caption for paint-on style""" - active_style = SccCaptionStyle.PaintOn - copied_lines = [] - cursor = self.active_cursor - - if self.has_active_caption(): - active_style = self.active_caption.get_caption_style() - cursor = self.active_caption.get_cursor() - - # Copy buffered lines - copied_lines = self.active_caption.copy_lines() - - # Push active to model if there is one - self.push_active_caption_to_model(time_code) - - # Initialize new buffered caption - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, active_style) - self.initialize_active_caption(time_code) - - if len(copied_lines) > 0: - # Set remaining lines to the new buffered caption - self.active_caption.set_lines(copied_lines) - - self.active_caption.set_cursor_at(cursor[0], cursor[1]) - - def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: SmpteTimeCode): - """Processes SCC Preamble Address Code it to the map to model""" - - pac_row = pac.get_row() - pac_indent = pac.get_indent() - - if self.current_style is SccCaptionStyle.PaintOn: - - self.paint_on_active_caption(time_code) - - if self.active_caption.get_caption_style() is SccCaptionStyle.PaintOn: - # Clear target row on Paint-On style - target_row = self.active_caption.get_lines().get(pac_row) - if target_row is not None: - target_row.clear() - - self.active_caption.set_cursor_at(pac_row, pac_indent) - - if self.active_caption.get_current_text() is None: - self.active_caption.new_caption_text() - - elif self.current_style is SccCaptionStyle.RollUp: - - if not self.has_active_caption(): - # If there is no current active caption, initialize an empty new paragraph - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.RollUp) - self.initialize_active_caption(time_code) - - # Ignore PACs for rows 5-11, but get indent from PACs for rows 1-4 and 12-15. (Roll-Up) - if pac_row in range(5, 12): - self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW) - self.active_caption.new_caption_text() - return - - # Force roll-up paragraph to belong to the same region - self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW, pac_indent) - - self.active_caption.new_caption_text() - - else: # Pop-On Style - - if self.buffered_caption is None: - self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.PopOn) - - # set cursor in paragraph and create line or text if necessary - self.buffered_caption.set_cursor_at(pac_row, pac_indent) - - self.buffered_caption.new_caption_text() - - self.current_color = pac.get_color() - self.current_font_style = pac.get_font_style() - self.current_text_decoration = pac.get_text_decoration() - - if self.has_active_caption(): - self.active_cursor = self.active_caption.get_cursor() - - def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTimeCode): - """Processes SCC Mid-Row Code to map it to the model""" - - # If the Paint-On or Roll-Up style is activated, write directly on active caption - processed_caption = self.buffered_caption - if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - processed_caption = self.active_caption - - if processed_caption is None: - raise ValueError("No current SCC caption initialized") - - color = mid_row_code.get_color() - font_style = mid_row_code.get_font_style() - text_decoration = mid_row_code.get_text_decoration() - - if self.previous_word_type is not SccMidRowCode: - # In case of multiple mid-row codes, move right only after the first code - - # If there is already text on the current line - if processed_caption.get_current_text() is not None \ - and processed_caption.get_current_text().get_text() != "": - - # In case of paint-on replacing text - if self.current_style is SccCaptionStyle.PaintOn \ - and processed_caption.get_current_line().get_cursor() < processed_caption.get_current_line().get_length(): - processed_caption.append_text(" ") - - else: - if text_decoration is None: - processed_caption.new_caption_text() - processed_caption.append_text(" ") - else: - processed_caption.append_text(" ") - processed_caption.new_caption_text() - - else: - processed_caption.append_text(" ") - - self.current_color = color - self.current_font_style = font_style - self.current_text_decoration = text_decoration - - else: - if color is not None: - self.current_color = color - if font_style is not None: - self.current_font_style = font_style - if text_decoration is not None: - self.current_text_decoration = text_decoration - - processed_caption.append_text(" ") - processed_caption.new_caption_text() - - if processed_caption.get_caption_style() is SccCaptionStyle.PaintOn: - processed_caption.get_current_text().set_begin(time_code) - - def process_attribute_code(self, attribute_code: SccAttributeCode): - """Processes SCC Attribute Code to map it to the model""" - - # If the Paint-On or Roll-Up style is activated, write directly on active caption - processed_caption = self.buffered_caption - if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - processed_caption = self.active_caption - - if processed_caption is None or processed_caption.get_current_text() is None: - raise ValueError("No current SCC caption nor content initialized") - - if processed_caption.get_current_text() is not None and processed_caption.get_current_text().get_text(): - processed_caption.new_caption_text() - - if attribute_code.is_background(): - processed_caption.get_current_text().add_style_property(StyleProperties.BackgroundColor, attribute_code.get_color()) - else: - processed_caption.get_current_text().add_style_property(StyleProperties.Color, attribute_code.get_color()) - - processed_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, - attribute_code.get_text_decoration()) - - def process_control_code(self, control_code: SccControlCode, time_code: SmpteTimeCode): - """Processes SCC Control Code to map it to the model""" - - processed_caption = self.buffered_caption - - if control_code is SccControlCode.RCL: - # Start a new Pop-On caption - self.current_style = SccCaptionStyle.PopOn - - elif control_code is SccControlCode.RDC: - # Start a new Paint-On caption - self.current_style = SccCaptionStyle.PaintOn - - elif control_code in (SccControlCode.RU2, SccControlCode.RU3, SccControlCode.RU4): - # Start a new Roll-Up caption - self.current_style = SccCaptionStyle.RollUp - - if control_code is SccControlCode.RU2: - self.roll_up_depth = 2 - - elif control_code is SccControlCode.RU3: - self.roll_up_depth = 3 - - elif control_code is SccControlCode.RU4: - self.roll_up_depth = 4 - - else: - # If the Paint-On or Roll-Up style is activated, write directly on active caption - if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - processed_caption = self.active_caption - - if control_code is SccControlCode.EOC: - # Display caption (Pop-On) - self.set_buffered_caption_begin_time(time_code) - self.flip_buffered_to_active_captions(time_code) - - if self.has_active_caption(): - # Set text alignment - if self.text_alignment == TextAlignment.AUTO: - text_alignment = self.active_caption.guess_text_alignment() - else: - text_alignment = self.text_alignment.text_align - - # Apply text alignment - self.active_caption.add_style_property(StyleProperties.TextAlign, text_alignment) - - elif control_code is SccControlCode.EDM: - # Erase displayed captions - if self.has_active_caption(): - if time_code is not None: - # End time is exclusive in the model, set it to the next frame - end_time_code = copy.copy(time_code) - end_time_code.add_frames() - else: - end_time_code = time_code - - self.push_active_caption_to_model(end_time_code) - - elif control_code is SccControlCode.ENM: - # Erase buffered caption - self.buffered_caption = None - - elif control_code is SccControlCode.TO1: - processed_caption.indent_cursor(1) - - elif control_code is SccControlCode.TO2: - processed_caption.indent_cursor(2) - - elif control_code is SccControlCode.TO3: - processed_caption.indent_cursor(3) - - elif control_code is SccControlCode.CR: - # Roll the displayed caption up one row (Roll-Up) - - if self.has_active_caption(): - # Push active caption to model (but don't erase it) - self.push_active_caption_to_model(time_code, False) - # Roll the active caption up - self.active_caption.roll_up() - # Get the remaining lines to initialize the following caption with the expected depth - previous_lines = self.active_caption.get_last_caption_lines(self.roll_up_depth - 1) - - # Initialize the new caption with the previous lines - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.RollUp) - self.initialize_active_caption(time_code) - self.active_caption.set_lines(previous_lines) - - self.active_caption.set_cursor_at(self.active_cursor[0], self.active_cursor[1]) - - elif control_code is SccControlCode.DER: - # Delete to End of Row (Paint-On) - # The DER may be issued from any point on a row to delete all displayable characters, transparent - # spaces, and mid-row codes from (and including) the current cell to the end of the row. - # Not used in this implementation since this SCC reader does not map the text overlapping into - # the model (i.e. a row is erased when a PAC is received, so before a new caption is written onto it). - pass - - elif control_code is SccControlCode.BS: - # Backspace - # When a Backspace is received, the cursor moves to the left one column position erasing - # the character or Mid-Row Code occupying that location, unless the cursor is in Column 1 - processed_caption.get_current_text().backspace() - - def process_text(self, word: str, time_code: SmpteTimeCode): - """Processes SCC text words""" - if self.current_style is SccCaptionStyle.PaintOn: - if word.startswith(" "): - - if self.active_caption.get_caption_style() is not SccCaptionStyle.PaintOn: - self.paint_on_active_caption(time_code) - self.active_caption.append_text(word) - - else: - self.active_caption.new_caption_text() - self.active_caption.append_text(word) - self.active_caption.get_current_text().set_begin(time_code) - - - elif word.endswith(" "): - self.active_caption.append_text(word) - - if self.active_caption.get_caption_style() is not SccCaptionStyle.PaintOn: - self.paint_on_active_caption(time_code) - else: - self.active_caption.new_caption_text() - self.active_caption.get_current_text().set_begin(time_code) - - else: - if not self.has_active_caption(): - self.paint_on_active_caption(time_code) - - self.active_caption.append_text(word) - - self.active_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) - self.active_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) - self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) - - elif self.current_style is SccCaptionStyle.RollUp: - self.active_caption.append_text(word) - - self.active_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) - self.active_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) - self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) - - else: - self.buffered_caption.append_text(word) - - self.buffered_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) - self.buffered_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) - self.buffered_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) - - if self.has_active_caption(): - self.active_cursor = self.active_caption.get_cursor() - - def flush(self, time_code: Optional[SmpteTimeCode] = None): - """Flushes the remaining current caption""" - if self.has_active_caption(): - self.push_active_caption_to_model(time_code) - - if self.buffered_caption is not None: - # Remove the buffered caption - self.buffered_caption = None - - def process_line(self, line: SccLine) -> SmpteTimeCode: - """Converts the SCC line to the data model""" - - debug = str(line.time_code) + "\t" - - for scc_word in line.scc_words: - - if self.previous_word is not None and self.previous_word.value == scc_word.value and self.previous_word.is_code(): - self.previous_word = None - continue - - line.time_code.add_frames() - - if scc_word.value == 0x0000: - continue - - if scc_word.byte_1 < 0x20: - - scc_code = scc_word.get_code() - - if isinstance(scc_code, SccPreambleAddressCode): - debug += "[PAC|" + str(scc_code.get_row()) + "|" + str(scc_code.get_indent()) - if scc_code.get_color() is not None: - debug += "|" + str(scc_code.get_color()) - if scc_code.get_font_style() is not None: - debug += "|I" - if scc_code.get_text_decoration() is not None: - debug += "|U" - debug += "/" + hex(scc_word.value) + "]" - self.process_preamble_address_code(scc_code, line.time_code) - self.previous_word_type = type(scc_code) - - elif isinstance(scc_code, SccAttributeCode): - debug += "[ATC/" + hex(scc_word.value) + "]" - self.process_attribute_code(scc_code) - self.previous_word_type = type(scc_code) - - elif isinstance(scc_code, SccMidRowCode): - debug += "[MRC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" - self.process_mid_row_code(scc_code, line.time_code) - self.previous_word_type = type(scc_code) - - elif isinstance(scc_code, SccControlCode): - debug += "[CC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" - self.process_control_code(scc_code, line.time_code) - self.previous_word_type = type(scc_code) - - elif isinstance(scc_code, SccSpecialCharacter): - word = scc_code.get_unicode_value() - debug += word - self.process_text(word, line.time_code) - self.previous_word_type = type(scc_code) - - elif isinstance(scc_code, SccExtendedCharacter): - if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - self.active_caption.get_current_text().backspace() - else: - self.buffered_caption.get_current_text().backspace() - - word = scc_code.get_unicode_value() - debug += word - self.process_text(word, line.time_code) - self.previous_word_type = type(scc_code) - - else: - debug += "[??/" + hex(scc_word.value) + "]" - LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value)) - self.previous_word_type = None - - else: - text = scc_word.to_text() - debug += text - self.process_text(text, line.time_code) - self.previous_word_type = str - - self.previous_word = scc_word - - LOGGER.debug(debug) - - return line.time_code - # # SCC reader @@ -565,7 +48,7 @@ def process_line(self, line: SccLine) -> SmpteTimeCode: def to_model(scc_content: str, config: Optional[SccReaderConfiguration] = None, progress_callback=lambda _: None): """Converts a SCC document to the data model""" - context = _SccContext(config) + context = SccContext(config) document = ContentDocument() # Safe area must be a 32x15 grid, that represents 80% of the root area @@ -613,7 +96,7 @@ def to_model(scc_content: str, config: Optional[SccReaderConfiguration] = None, if scc_line is None: continue - context.process_line(scc_line) + scc_line.process(context) context.flush() From abd2c587eca3caa9f3cbe274ceee13c99c9ffbe1 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Thu, 10 Aug 2023 12:07:12 +0200 Subject: [PATCH 06/13] SCC: codes and characters refactoring --- .../ttconv/scc/codes/attribute_codes.py | 4 + .../python/ttconv/scc/codes/control_codes.py | 4 + .../ttconv/scc/codes/extended_characters.py | 128 ++++ .../python/ttconv/scc/codes/mid_row_codes.py | 4 + .../scc/codes/preambles_address_codes.py | 12 + .../ttconv/scc/codes/special_characters.py | 98 +-- src/main/python/ttconv/scc/line.py | 18 +- src/main/python/ttconv/scc/word.py | 3 +- .../python/test_scc_extended_characters.py | 557 ++++++++++++++++++ .../python/test_scc_special_characters.py | 525 +---------------- src/test/python/test_scc_word.py | 42 +- 11 files changed, 743 insertions(+), 652 deletions(-) create mode 100644 src/main/python/ttconv/scc/codes/extended_characters.py create mode 100644 src/test/python/test_scc_extended_characters.py diff --git a/src/main/python/ttconv/scc/codes/attribute_codes.py b/src/main/python/ttconv/scc/codes/attribute_codes.py index d724e4d9..738082d3 100644 --- a/src/main/python/ttconv/scc/codes/attribute_codes.py +++ b/src/main/python/ttconv/scc/codes/attribute_codes.py @@ -85,3 +85,7 @@ def find(value: int) -> Optional[SccAttributeCode]: if attribute_code.contains_value(value): return attribute_code return None + + def debug(self, value: int) -> str: + """Debug representation of the code""" + return "[ATC|" + self.get_name() + "/" + hex(value) + "]" diff --git a/src/main/python/ttconv/scc/codes/control_codes.py b/src/main/python/ttconv/scc/codes/control_codes.py index b815086c..ebdc93de 100644 --- a/src/main/python/ttconv/scc/codes/control_codes.py +++ b/src/main/python/ttconv/scc/codes/control_codes.py @@ -74,3 +74,7 @@ def find(value: int) -> typing.Optional[SccControlCode]: if control_code.contains_value(value): return control_code return None + + def debug(self, value: int) -> str: + """Debug representation of the code""" + return "[CC|" + self.get_name() + "/" + hex(value) + "]" diff --git a/src/main/python/ttconv/scc/codes/extended_characters.py b/src/main/python/ttconv/scc/codes/extended_characters.py new file mode 100644 index 00000000..b4fd7614 --- /dev/null +++ b/src/main/python/ttconv/scc/codes/extended_characters.py @@ -0,0 +1,128 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +# Copyright (c) 2020, Sandflow Consulting LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""SCC Extended characters""" + +from __future__ import annotations + +import typing + +from ttconv.scc.codes import SccCode + + +class SccExtendedCharacter(SccCode): + """SCC Extended character definition""" + + # Spanish extended characters + LATIN_CAPITAL_LETTER_A_WITH_ACUTE = (0x1220, 0x1A20, '\u00C1') # Á capital A with acute accent + LATIN_CAPITAL_LETTER_E_WITH_ACUTE = (0x1221, 0x1A21, '\u00C9') # É capital E with acute accent + LATIN_CAPITAL_LETTER_O_WITH_ACUTE = (0x1222, 0x1A22, '\u00D3') # Ó capital O with acute accent + LATIN_CAPITAL_LETTER_U_WITH_ACUTE = (0x1223, 0x1A23, '\u00DA') # Ú capital U with acute accent + LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS = (0x1224, 0x1A24, '\u00DC') # Ü capital U with diaeresis or umlaut + LATIN_SMALL_LETTER_U_WITH_DIAERESIS = (0x1225, 0x1A25, '\u00FC') # ü small u with diaeresis or umlaut + LEFT_SINGLE_QUOTATION_MARK = (0x1226, 0x1A26, '\u2018') # ‘ opening single quote + INVERTED_EXCLAMATION_MARK = (0x1227, 0x1A27, '\u00A1') # ¡ inverted exclamation mark + + # Miscellaneous extended characters + ASTERISK = (0x1228, 0x1A28, '\u002A') # * Asterisk + NEUTRAL_SINGLE_QUOTATION_MARK = (0x1229, 0x1A29, '\u0027') # ' plain single quote + BOX_DRAWINGS_HEAVY_HORIZONTAL = (0x122A, 0x1A2A, '\u2501') # — em dash + COPYRIGHT_SIGN = (0x122B, 0x1A2B, '\u00A9') # © Copyright + SERVICE_MARK = (0x122C, 0x1A2C, '\u2120') # SM Servicemark + BULLET = (0x122D, 0x1A2D, '\u2022') # ● round bullet + LEFT_DOUBLE_QUOTATION_MARK = (0x122E, 0x1A2E, '\u201C') # “ opening double quotes + RIGHT_DOUBLE_QUOTATION_MARK = (0x122F, 0x1A2F, '\u201D') # ” closing double quotes + + # French extended characters + LATIN_CAPITAL_LETTER_A_WITH_GRAVE = (0x1230, 0x1A30, '\u00C0') # À capital A with grave accent + LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX = (0x1231, 0x1A31, '\u00C2') # Â capital A with circumflex accent + LATIN_CAPITAL_LETTER_C_WITH_CEDILLA = (0x1232, 0x1A32, '\u00C7') # Ç capital C with cedilla + LATIN_CAPITAL_LETTER_E_WITH_GRAVE = (0x1233, 0x1A33, '\u00C8') # È capital E with grave accent + LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX = (0x1234, 0x1A34, '\u00CA') # Ê capital E with circumflex accent + LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS = (0x1235, 0x1A35, '\u00CB') # Ë capital E with diaeresis or umlaut mark + LATIN_SMALL_LETTER_E_WITH_DIAERESIS = (0x1236, 0x1A36, '\u00EB') # ë small e with diaeresis or umlaut mark + LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX = (0x1237, 0x1A37, '\u00CE') # Î capital I with circumflex accent + LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS = (0x1238, 0x1A38, '\u00CF') # Ï capital I with diaeresis or umlaut mark + LATIN_SMALL_LETTER_I_WITH_DIAERESIS = (0x1239, 0x1A39, '\u00EF') # ï small i with diaeresis or umlaut mark + LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX = (0x123A, 0x1A3A, '\u00D4') # Ô capital O with circumflex + LATIN_CAPITAL_LETTER_U_WITH_GRAVE = (0x123B, 0x1A3B, '\u00D9') # Ù capital U with grave accent + LATIN_SMALL_LETTER_U_WITH_GRAVE = (0x123C, 0x1A3C, '\u00F9') # ù small u with grave accent + LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX = (0x123D, 0x1A3D, '\u00DB') # Û capital U with circumflex accent + LEFT_POINTING_GUILLEMET = (0x123E, 0x1A3E, '\u00AB') # « opening guillemets + RIGHT_POINTING_GUILLEMET = (0x123F, 0x1A3F, '\u00BB') # » closing guillemets + + # Portuguese extended characters + LATIN_CAPITAL_LETTER_A_WITH_TILDE = (0x1320, 0x1B20, '\u00C3') # Ã capital A with tilde + LATIN_SMALL_LETTER_A_WITH_TILDE = (0x1321, 0x1B21, '\u00E3') # ã small a with tilde + LATIN_CAPITAL_LETTER_I_WITH_ACUTE = (0x1322, 0x1B22, '\u00CD') # Í capital I with acute accent + LATIN_CAPITAL_LETTER_I_WITH_GRAVE = (0x1323, 0x1B23, '\u00CC') # Ì capital I with grave accent + LATIN_SMALL_LETTER_I_WITH_GRAVE = (0x1324, 0x1B24, '\u00EC') # ì small i with grave accent + LATIN_CAPITAL_LETTER_O_WITH_GRAVE = (0x1325, 0x1B25, '\u00D2') # Ò capital O with grave accent + LATIN_SMALL_LETTER_O_WITH_GRAVE = (0x1326, 0x1B26, '\u00F2') # ò small o with grave accent + LATIN_CAPITAL_LETTER_O_WITH_TILDE = (0x1327, 0x1B27, '\u00D5') # Õ capital O with tilde + LATIN_SMALL_LETTER_O_WITH_TILDE = (0x1328, 0x1B28, '\u00F5') # õ small o with tilde + BRACE_OPENING = (0x1329, 0x1B29, '\u007B') # { opening brace + BRACE_CLOSING = (0x132A, 0x1B2A, '\u007D') # } closing brace + REVERSE_SOLIDUS = (0x132B, 0x1B2B, '\u005C') # \ backslash + LATIN_SMALL_LETTER_TURNED_V = (0x132C, 0x1B2C, '\u028C') # ^ caret + LOW_LINE = (0x132D, 0x1B2D, '\u005F') # _ Underbar + VERTICAL_LINE = (0x132E, 0x1B2E, '\u007C') # | pipe + TILDE = (0x132F, 0x1B2F, '\u007E') # ~ tilde + + # German extended characters + LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS = (0x1330, 0x1B30, '\u00C4') # Ä Capital A with diaeresis or umlaut mark + LATIN_SMALL_LETTER_A_WITH_DIAERESIS = (0x1331, 0x1B31, '\u00E4') # ä small a with diaeresis or umlaut mark + LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS = (0x1332, 0x1B32, '\u00D6') # Ö Capital O with diaeresis or umlaut mark + LATIN_SMALL_LETTER_O_WITH_DIAERESIS = (0x1333, 0x1B33, '\u00F6') # ö small o with diaeresis or umlaut mark + ESZETT = (0x1334, 0x1B34, '\u00DF') # ß eszett (mall sharp s) + YEN_SIGN = (0x1335, 0x1B35, '\u00A5') # ¥ yen + CURRENCY_SIGN = (0x1336, 0x1B36, '\u00A4') # ¤ non-specific currency sign + BOX_DRAWINGS_HEAVY_VERTICAL = (0x1337, 0x1B37, '\u2503') # | Vertical bar + + # Danish extended characters + LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE = (0x1338, 0x1B38, '\u00C5') # Å capital A with ring + LATIN_SMALL_LETTER_A_WITH_RING_ABOVE = (0x1339, 0x1B39, '\u00E5') # å small a with ring + LATIN_CAPITAL_LETTER_O_WITH_STROKE = (0x133A, 0x1B3A, '\u00D8') # Ø capital O with slash + LATIN_SMALL_LETTER_O_WITH_STROKE = (0x133B, 0x1B3B, '\u00F8') # ø small o with slash + BOX_DRAWINGS_HEAVY_DOWN_AND_RIGHT = (0x133C, 0x1B3C, '\u250F') # ⎡ upper left corner + BOX_DRAWINGS_HEAVY_DOWN_AND_LEFT = (0x133D, 0x1B3D, '\u2513') # ⎤ upper right corner + BOX_DRAWINGS_HEAVY_UP_AND_RIGHT = (0x133E, 0x1B3E, '\u2517') # ⎣ lower left corner + BOX_DRAWINGS_HEAVY_UP_AND_LEFT = (0x133F, 0x1B3F, '\u251B') # ⎦ lower right corner + + def __init__(self, channel_1: int, channel_2: int, unicode: chr): + super().__init__(channel_1, channel_2) + self._unicode = unicode + + def get_unicode_value(self) -> chr: + """Returns the special or extended character unicode value""" + return self._unicode + + @staticmethod + def find(value: int) -> typing.Optional[SccExtendedCharacter]: + """Find the special character corresponding to the specified value""" + for spec_char in list(SccExtendedCharacter): + if spec_char.contains_value(value): + return spec_char + return None diff --git a/src/main/python/ttconv/scc/codes/mid_row_codes.py b/src/main/python/ttconv/scc/codes/mid_row_codes.py index 789b3d07..2cc0f936 100644 --- a/src/main/python/ttconv/scc/codes/mid_row_codes.py +++ b/src/main/python/ttconv/scc/codes/mid_row_codes.py @@ -87,3 +87,7 @@ def find(value: int) -> typing.Optional[SccMidRowCode]: if mid_row_code.contains_value(value): return mid_row_code return None + + def debug(self, value: int) -> str: + """Debug representation of the code""" + return "[MRC|" + self.get_name() + "/" + hex(value) + "]" diff --git a/src/main/python/ttconv/scc/codes/preambles_address_codes.py b/src/main/python/ttconv/scc/codes/preambles_address_codes.py index 1c450084..c530322b 100644 --- a/src/main/python/ttconv/scc/codes/preambles_address_codes.py +++ b/src/main/python/ttconv/scc/codes/preambles_address_codes.py @@ -162,3 +162,15 @@ def _get_description_bits(byte_2: int) -> Optional[_SccPacDescriptionBits]: if byte_2 not in list(range(0x40, 0x80)): return None return _SccPacDescriptionBits(byte_2 & 0x1F) + + def debug(self, value: int) -> str: + """Debug representation of the code""" + debug = "[PAC|" + str(self.get_row()) + "|" + str(self.get_indent()) + if self.get_color() is not None: + debug += "|" + str(self.get_color()) + if self.get_font_style() is not None: + debug += "|I" + if self.get_text_decoration() is not None: + debug += "|U" + debug += "/" + hex(value) + "]" + return debug diff --git a/src/main/python/ttconv/scc/codes/special_characters.py b/src/main/python/ttconv/scc/codes/special_characters.py index 04b87b7c..74668de4 100644 --- a/src/main/python/ttconv/scc/codes/special_characters.py +++ b/src/main/python/ttconv/scc/codes/special_characters.py @@ -23,7 +23,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""SCC Special and Extended characters""" +"""SCC Special characters""" from __future__ import annotations @@ -68,99 +68,3 @@ def find(value: int) -> typing.Optional[SccSpecialCharacter]: if spec_char.contains_value(value): return spec_char return None - - -class SccExtendedCharacter(SccCode): - """SCC Extended character definition""" - - # Spanish extended characters - LATIN_CAPITAL_LETTER_A_WITH_ACUTE = (0x1220, 0x1A20, '\u00C1') # Á capital A with acute accent - LATIN_CAPITAL_LETTER_E_WITH_ACUTE = (0x1221, 0x1A21, '\u00C9') # É capital E with acute accent - LATIN_CAPITAL_LETTER_O_WITH_ACUTE = (0x1222, 0x1A22, '\u00D3') # Ó capital O with acute accent - LATIN_CAPITAL_LETTER_U_WITH_ACUTE = (0x1223, 0x1A23, '\u00DA') # Ú capital U with acute accent - LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS = (0x1224, 0x1A24, '\u00DC') # Ü capital U with diaeresis or umlaut - LATIN_SMALL_LETTER_U_WITH_DIAERESIS = (0x1225, 0x1A25, '\u00FC') # ü small u with diaeresis or umlaut - LEFT_SINGLE_QUOTATION_MARK = (0x1226, 0x1A26, '\u2018') # ‘ opening single quote - INVERTED_EXCLAMATION_MARK = (0x1227, 0x1A27, '\u00A1') # ¡ inverted exclamation mark - - # Miscellaneous extended characters - ASTERISK = (0x1228, 0x1A28, '\u002A') # * Asterisk - NEUTRAL_SINGLE_QUOTATION_MARK = (0x1229, 0x1A29, '\u0027') # ' plain single quote - BOX_DRAWINGS_HEAVY_HORIZONTAL = (0x122A, 0x1A2A, '\u2501') # — em dash - COPYRIGHT_SIGN = (0x122B, 0x1A2B, '\u00A9') # © Copyright - SERVICE_MARK = (0x122C, 0x1A2C, '\u2120') # SM Servicemark - BULLET = (0x122D, 0x1A2D, '\u2022') # ● round bullet - LEFT_DOUBLE_QUOTATION_MARK = (0x122E, 0x1A2E, '\u201C') # “ opening double quotes - RIGHT_DOUBLE_QUOTATION_MARK = (0x122F, 0x1A2F, '\u201D') # ” closing double quotes - - # French extended characters - LATIN_CAPITAL_LETTER_A_WITH_GRAVE = (0x1230, 0x1A30, '\u00C0') # À capital A with grave accent - LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX = (0x1231, 0x1A31, '\u00C2') # Â capital A with circumflex accent - LATIN_CAPITAL_LETTER_C_WITH_CEDILLA = (0x1232, 0x1A32, '\u00C7') # Ç capital C with cedilla - LATIN_CAPITAL_LETTER_E_WITH_GRAVE = (0x1233, 0x1A33, '\u00C8') # È capital E with grave accent - LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX = (0x1234, 0x1A34, '\u00CA') # Ê capital E with circumflex accent - LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS = (0x1235, 0x1A35, '\u00CB') # Ë capital E with diaeresis or umlaut mark - LATIN_SMALL_LETTER_E_WITH_DIAERESIS = (0x1236, 0x1A36, '\u00EB') # ë small e with diaeresis or umlaut mark - LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX = (0x1237, 0x1A37, '\u00CE') # Î capital I with circumflex accent - LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS = (0x1238, 0x1A38, '\u00CF') # Ï capital I with diaeresis or umlaut mark - LATIN_SMALL_LETTER_I_WITH_DIAERESIS = (0x1239, 0x1A39, '\u00EF') # ï small i with diaeresis or umlaut mark - LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX = (0x123A, 0x1A3A, '\u00D4') # Ô capital O with circumflex - LATIN_CAPITAL_LETTER_U_WITH_GRAVE = (0x123B, 0x1A3B, '\u00D9') # Ù capital U with grave accent - LATIN_SMALL_LETTER_U_WITH_GRAVE = (0x123C, 0x1A3C, '\u00F9') # ù small u with grave accent - LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX = (0x123D, 0x1A3D, '\u00DB') # Û capital U with circumflex accent - LEFT_POINTING_GUILLEMET = (0x123E, 0x1A3E, '\u00AB') # « opening guillemets - RIGHT_POINTING_GUILLEMET = (0x123F, 0x1A3F, '\u00BB') # » closing guillemets - - # Portuguese extended characters - LATIN_CAPITAL_LETTER_A_WITH_TILDE = (0x1320, 0x1B20, '\u00C3') # Ã capital A with tilde - LATIN_SMALL_LETTER_A_WITH_TILDE = (0x1321, 0x1B21, '\u00E3') # ã small a with tilde - LATIN_CAPITAL_LETTER_I_WITH_ACUTE = (0x1322, 0x1B22, '\u00CD') # Í capital I with acute accent - LATIN_CAPITAL_LETTER_I_WITH_GRAVE = (0x1323, 0x1B23, '\u00CC') # Ì capital I with grave accent - LATIN_SMALL_LETTER_I_WITH_GRAVE = (0x1324, 0x1B24, '\u00EC') # ì small i with grave accent - LATIN_CAPITAL_LETTER_O_WITH_GRAVE = (0x1325, 0x1B25, '\u00D2') # Ò capital O with grave accent - LATIN_SMALL_LETTER_O_WITH_GRAVE = (0x1326, 0x1B26, '\u00F2') # ò small o with grave accent - LATIN_CAPITAL_LETTER_O_WITH_TILDE = (0x1327, 0x1B27, '\u00D5') # Õ capital O with tilde - LATIN_SMALL_LETTER_O_WITH_TILDE = (0x1328, 0x1B28, '\u00F5') # õ small o with tilde - BRACE_OPENING = (0x1329, 0x1B29, '\u007B') # { opening brace - BRACE_CLOSING = (0x132A, 0x1B2A, '\u007D') # } closing brace - REVERSE_SOLIDUS = (0x132B, 0x1B2B, '\u005C') # \ backslash - LATIN_SMALL_LETTER_TURNED_V = (0x132C, 0x1B2C, '\u028C') # ^ caret - LOW_LINE = (0x132D, 0x1B2D, '\u005F') # _ Underbar - VERTICAL_LINE = (0x132E, 0x1B2E, '\u007C') # | pipe - TILDE = (0x132F, 0x1B2F, '\u007E') # ~ tilde - - # German extended characters - LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS = (0x1330, 0x1B30, '\u00C4') # Ä Capital A with diaeresis or umlaut mark - LATIN_SMALL_LETTER_A_WITH_DIAERESIS = (0x1331, 0x1B31, '\u00E4') # ä small a with diaeresis or umlaut mark - LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS = (0x1332, 0x1B32, '\u00D6') # Ö Capital O with diaeresis or umlaut mark - LATIN_SMALL_LETTER_O_WITH_DIAERESIS = (0x1333, 0x1B33, '\u00F6') # ö small o with diaeresis or umlaut mark - ESZETT = (0x1334, 0x1B34, '\u00DF') # ß eszett (mall sharp s) - YEN_SIGN = (0x1335, 0x1B35, '\u00A5') # ¥ yen - CURRENCY_SIGN = (0x1336, 0x1B36, '\u00A4') # ¤ non-specific currency sign - BOX_DRAWINGS_HEAVY_VERTICAL = (0x1337, 0x1B37, '\u2503') # | Vertical bar - - # Danish extended characters - LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE = (0x1338, 0x1B38, '\u00C5') # Å capital A with ring - LATIN_SMALL_LETTER_A_WITH_RING_ABOVE = (0x1339, 0x1B39, '\u00E5') # å small a with ring - LATIN_CAPITAL_LETTER_O_WITH_STROKE = (0x133A, 0x1B3A, '\u00D8') # Ø capital O with slash - LATIN_SMALL_LETTER_O_WITH_STROKE = (0x133B, 0x1B3B, '\u00F8') # ø small o with slash - BOX_DRAWINGS_HEAVY_DOWN_AND_RIGHT = (0x133C, 0x1B3C, '\u250F') # ⎡ upper left corner - BOX_DRAWINGS_HEAVY_DOWN_AND_LEFT = (0x133D, 0x1B3D, '\u2513') # ⎤ upper right corner - BOX_DRAWINGS_HEAVY_UP_AND_RIGHT = (0x133E, 0x1B3E, '\u2517') # ⎣ lower left corner - BOX_DRAWINGS_HEAVY_UP_AND_LEFT = (0x133F, 0x1B3F, '\u251B') # ⎦ lower right corner - - def __init__(self, channel_1: int, channel_2: int, unicode: chr): - super().__init__(channel_1, channel_2) - self._unicode = unicode - - def get_unicode_value(self) -> chr: - """Returns the special or extended character unicode value""" - return self._unicode - - @staticmethod - def find(value: int) -> typing.Optional[SccExtendedCharacter]: - """Find the special character corresponding to the specified value""" - for spec_char in list(SccExtendedCharacter): - if spec_char.contains_value(value): - return spec_char - return None diff --git a/src/main/python/ttconv/scc/line.py b/src/main/python/ttconv/scc/line.py index e9dcd56d..b9af4821 100644 --- a/src/main/python/ttconv/scc/line.py +++ b/src/main/python/ttconv/scc/line.py @@ -34,9 +34,10 @@ from ttconv.scc.caption_style import SccCaptionStyle from ttconv.scc.codes.attribute_codes import SccAttributeCode from ttconv.scc.codes.control_codes import SccControlCode +from ttconv.scc.codes.extended_characters import SccExtendedCharacter from ttconv.scc.codes.mid_row_codes import SccMidRowCode from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode -from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter +from ttconv.scc.codes.special_characters import SccSpecialCharacter from ttconv.scc.context import SccContext from ttconv.scc.disassembly import get_color_disassembly, get_font_style_disassembly, get_text_decoration_disassembly from ttconv.scc.word import SccWord @@ -179,29 +180,22 @@ def process(self, context: SccContext) -> SmpteTimeCode: scc_code = scc_word.get_code() if isinstance(scc_code, SccPreambleAddressCode): - debug += "[PAC|" + str(scc_code.get_row()) + "|" + str(scc_code.get_indent()) - if scc_code.get_color() is not None: - debug += "|" + str(scc_code.get_color()) - if scc_code.get_font_style() is not None: - debug += "|I" - if scc_code.get_text_decoration() is not None: - debug += "|U" - debug += "/" + hex(scc_word.value) + "]" + debug += scc_code.debug(scc_word.value) context.process_preamble_address_code(scc_code, self.time_code) context.previous_word_type = type(scc_code) elif isinstance(scc_code, SccAttributeCode): - debug += "[ATC/" + hex(scc_word.value) + "]" + debug += scc_code.debug(scc_word.value) context.process_attribute_code(scc_code) context.previous_word_type = type(scc_code) elif isinstance(scc_code, SccMidRowCode): - debug += "[MRC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" + debug += scc_code.debug(scc_word.value) context.process_mid_row_code(scc_code, self.time_code) context.previous_word_type = type(scc_code) elif isinstance(scc_code, SccControlCode): - debug += "[CC|" + scc_code.get_name() + "/" + hex(scc_word.value) + "]" + debug += scc_code.debug(scc_word.value) context.process_control_code(scc_code, self.time_code) context.previous_word_type = type(scc_code) diff --git a/src/main/python/ttconv/scc/word.py b/src/main/python/ttconv/scc/word.py index 84f70c06..e2caf28c 100644 --- a/src/main/python/ttconv/scc/word.py +++ b/src/main/python/ttconv/scc/word.py @@ -32,9 +32,10 @@ from ttconv.scc.codes import SccCode from ttconv.scc.codes.attribute_codes import SccAttributeCode from ttconv.scc.codes.control_codes import SccControlCode +from ttconv.scc.codes.extended_characters import SccExtendedCharacter from ttconv.scc.codes.mid_row_codes import SccMidRowCode from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode -from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter +from ttconv.scc.codes.special_characters import SccSpecialCharacter from ttconv.scc.codes.standard_characters import SCC_STANDARD_CHARACTERS_MAPPING PARITY_BIT_MASK = 0b01111111 diff --git a/src/test/python/test_scc_extended_characters.py b/src/test/python/test_scc_extended_characters.py new file mode 100644 index 00000000..b6f52471 --- /dev/null +++ b/src/test/python/test_scc_extended_characters.py @@ -0,0 +1,557 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +# Copyright (c) 2020, Sandflow Consulting LLC +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""Unit tests for the SCC Extended characters""" + +# pylint: disable=R0201,C0115,C0116,W0212 + +import unittest + +from ttconv.scc.codes.extended_characters import SccExtendedCharacter + + +class SccExtendedCharactersTest(unittest.TestCase): + + def test_scc_spanish_extended_characters_unicode_values(self): + extended_char = SccExtendedCharacter.find(0x1220) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_ACUTE, extended_char) + self.assertEqual('\u00C1', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A20) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_ACUTE, extended_char) + self.assertEqual('\u00C1', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1221) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_ACUTE, extended_char) + self.assertEqual('\u00C9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A21) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_ACUTE, extended_char) + self.assertEqual('\u00C9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1222) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_ACUTE, extended_char) + self.assertEqual('\u00D3', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A22) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_ACUTE, extended_char) + self.assertEqual('\u00D3', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1223) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_ACUTE, extended_char) + self.assertEqual('\u00DA', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A23) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_ACUTE, extended_char) + self.assertEqual('\u00DA', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1224) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00DC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A24) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00DC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1225) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00FC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A25) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00FC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1226) + self.assertEqual(SccExtendedCharacter.LEFT_SINGLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u2018', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A26) + self.assertEqual(SccExtendedCharacter.LEFT_SINGLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u2018', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1227) + self.assertEqual(SccExtendedCharacter.INVERTED_EXCLAMATION_MARK, extended_char) + self.assertEqual('\u00A1', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A27) + self.assertEqual(SccExtendedCharacter.INVERTED_EXCLAMATION_MARK, extended_char) + self.assertEqual('\u00A1', extended_char.get_unicode_value()) + + def test_scc_miscellaneous_extended_characters_unicode_values(self): + extended_char = SccExtendedCharacter.find(0x1228) + self.assertEqual(SccExtendedCharacter.ASTERISK, extended_char) + self.assertEqual('\u002A', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A28) + self.assertEqual(SccExtendedCharacter.ASTERISK, extended_char) + self.assertEqual('\u002A', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1229) + self.assertEqual(SccExtendedCharacter.NEUTRAL_SINGLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u0027', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A29) + self.assertEqual(SccExtendedCharacter.NEUTRAL_SINGLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u0027', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x122A) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_HORIZONTAL, extended_char) + self.assertEqual('\u2501', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A2A) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_HORIZONTAL, extended_char) + self.assertEqual('\u2501', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x122B) + self.assertEqual(SccExtendedCharacter.COPYRIGHT_SIGN, extended_char) + self.assertEqual('\u00A9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A2B) + self.assertEqual(SccExtendedCharacter.COPYRIGHT_SIGN, extended_char) + self.assertEqual('\u00A9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x122C) + self.assertEqual(SccExtendedCharacter.SERVICE_MARK, extended_char) + self.assertEqual('\u2120', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A2C) + self.assertEqual(SccExtendedCharacter.SERVICE_MARK, extended_char) + self.assertEqual('\u2120', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x122D) + self.assertEqual(SccExtendedCharacter.BULLET, extended_char) + self.assertEqual('\u2022', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A2D) + self.assertEqual(SccExtendedCharacter.BULLET, extended_char) + self.assertEqual('\u2022', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x122E) + self.assertEqual(SccExtendedCharacter.LEFT_DOUBLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u201C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A2E) + self.assertEqual(SccExtendedCharacter.LEFT_DOUBLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u201C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x122F) + self.assertEqual(SccExtendedCharacter.RIGHT_DOUBLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u201D', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A2F) + self.assertEqual(SccExtendedCharacter.RIGHT_DOUBLE_QUOTATION_MARK, extended_char) + self.assertEqual('\u201D', extended_char.get_unicode_value()) + + def test_scc_french_extended_characters_unicode_values(self): + extended_char = SccExtendedCharacter.find(0x1230) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_GRAVE, extended_char) + self.assertEqual('\u00C0', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A30) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_GRAVE, extended_char) + self.assertEqual('\u00C0', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1231) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00C2', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A31) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00C2', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1232) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, extended_char) + self.assertEqual('\u00C7', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A32) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, extended_char) + self.assertEqual('\u00C7', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1233) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_GRAVE, extended_char) + self.assertEqual('\u00C8', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A33) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_GRAVE, extended_char) + self.assertEqual('\u00C8', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1234) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00CA', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A34) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00CA', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1235) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00CB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A35) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00CB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1236) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_E_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00EB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A36) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_E_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00EB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1237) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00CE', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A37) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00CE', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1238) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00CF', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A38) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00CF', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1239) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00EF', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A39) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00EF', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x123A) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00D4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A3A) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00D4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x123B) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_GRAVE, extended_char) + self.assertEqual('\u00D9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A3B) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_GRAVE, extended_char) + self.assertEqual('\u00D9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x123C) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_GRAVE, extended_char) + self.assertEqual('\u00F9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A3C) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_GRAVE, extended_char) + self.assertEqual('\u00F9', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x123D) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00DB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A3D) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, extended_char) + self.assertEqual('\u00DB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x123E) + self.assertEqual(SccExtendedCharacter.LEFT_POINTING_GUILLEMET, extended_char) + self.assertEqual('\u00AB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A3E) + self.assertEqual(SccExtendedCharacter.LEFT_POINTING_GUILLEMET, extended_char) + self.assertEqual('\u00AB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x123F) + self.assertEqual(SccExtendedCharacter.RIGHT_POINTING_GUILLEMET, extended_char) + self.assertEqual('\u00BB', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1A3F) + self.assertEqual(SccExtendedCharacter.RIGHT_POINTING_GUILLEMET, extended_char) + self.assertEqual('\u00BB', extended_char.get_unicode_value()) + + def test_scc_portuguese_extended_characters_unicode_values(self): + extended_char = SccExtendedCharacter.find(0x1320) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_TILDE, extended_char) + self.assertEqual('\u00C3', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B20) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_TILDE, extended_char) + self.assertEqual('\u00C3', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1321) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_TILDE, extended_char) + self.assertEqual('\u00E3', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B21) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_TILDE, extended_char) + self.assertEqual('\u00E3', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1322) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_ACUTE, extended_char) + self.assertEqual('\u00CD', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B22) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_ACUTE, extended_char) + self.assertEqual('\u00CD', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1323) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_GRAVE, extended_char) + self.assertEqual('\u00CC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B23) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_GRAVE, extended_char) + self.assertEqual('\u00CC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1324) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_GRAVE, extended_char) + self.assertEqual('\u00EC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B24) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_GRAVE, extended_char) + self.assertEqual('\u00EC', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1325) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_GRAVE, extended_char) + self.assertEqual('\u00D2', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B25) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_GRAVE, extended_char) + self.assertEqual('\u00D2', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1326) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_GRAVE, extended_char) + self.assertEqual('\u00F2', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B26) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_GRAVE, extended_char) + self.assertEqual('\u00F2', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1327) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_TILDE, extended_char) + self.assertEqual('\u00D5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B27) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_TILDE, extended_char) + self.assertEqual('\u00D5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1328) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_TILDE, extended_char) + self.assertEqual('\u00F5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B28) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_TILDE, extended_char) + self.assertEqual('\u00F5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1329) + self.assertEqual(SccExtendedCharacter.BRACE_OPENING, extended_char) + self.assertEqual('\u007B', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B29) + self.assertEqual(SccExtendedCharacter.BRACE_OPENING, extended_char) + self.assertEqual('\u007B', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x132A) + self.assertEqual(SccExtendedCharacter.BRACE_CLOSING, extended_char) + self.assertEqual('\u007D', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B2A) + self.assertEqual(SccExtendedCharacter.BRACE_CLOSING, extended_char) + self.assertEqual('\u007D', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x132B) + self.assertEqual(SccExtendedCharacter.REVERSE_SOLIDUS, extended_char) + self.assertEqual('\u005C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B2B) + self.assertEqual(SccExtendedCharacter.REVERSE_SOLIDUS, extended_char) + self.assertEqual('\u005C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x132C) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_TURNED_V, extended_char) + self.assertEqual('\u028C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B2C) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_TURNED_V, extended_char) + self.assertEqual('\u028C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x132D) + self.assertEqual(SccExtendedCharacter.LOW_LINE, extended_char) + self.assertEqual('\u005F', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B2D) + self.assertEqual(SccExtendedCharacter.LOW_LINE, extended_char) + self.assertEqual('\u005F', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x132E) + self.assertEqual(SccExtendedCharacter.VERTICAL_LINE, extended_char) + self.assertEqual('\u007C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B2E) + self.assertEqual(SccExtendedCharacter.VERTICAL_LINE, extended_char) + self.assertEqual('\u007C', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x132F) + self.assertEqual(SccExtendedCharacter.TILDE, extended_char) + self.assertEqual('\u007E', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B2F) + self.assertEqual(SccExtendedCharacter.TILDE, extended_char) + self.assertEqual('\u007E', extended_char.get_unicode_value()) + + def test_scc_german_extended_characters_unicode_values(self): + extended_char = SccExtendedCharacter.find(0x1330) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00C4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B30) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00C4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1331) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00E4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B31) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00E4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1332) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00D6', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B32) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00D6', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1333) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00F6', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B33) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_DIAERESIS, extended_char) + self.assertEqual('\u00F6', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1334) + self.assertEqual(SccExtendedCharacter.ESZETT, extended_char) + self.assertEqual('\u00DF', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B34) + self.assertEqual(SccExtendedCharacter.ESZETT, extended_char) + self.assertEqual('\u00DF', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1335) + self.assertEqual(SccExtendedCharacter.YEN_SIGN, extended_char) + self.assertEqual('\u00A5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B35) + self.assertEqual(SccExtendedCharacter.YEN_SIGN, extended_char) + self.assertEqual('\u00A5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1336) + self.assertEqual(SccExtendedCharacter.CURRENCY_SIGN, extended_char) + self.assertEqual('\u00A4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B36) + self.assertEqual(SccExtendedCharacter.CURRENCY_SIGN, extended_char) + self.assertEqual('\u00A4', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1337) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_VERTICAL, extended_char) + self.assertEqual('\u2503', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B37) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_VERTICAL, extended_char) + self.assertEqual('\u2503', extended_char.get_unicode_value()) + + def test_scc_danish_extended_characters_unicode_values(self): + extended_char = SccExtendedCharacter.find(0x1338) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, extended_char) + self.assertEqual('\u00C5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B38) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, extended_char) + self.assertEqual('\u00C5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1339) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, extended_char) + self.assertEqual('\u00E5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B39) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, extended_char) + self.assertEqual('\u00E5', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x133A) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_STROKE, extended_char) + self.assertEqual('\u00D8', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B3A) + self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_STROKE, extended_char) + self.assertEqual('\u00D8', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x133B) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_STROKE, extended_char) + self.assertEqual('\u00F8', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B3B) + self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_STROKE, extended_char) + self.assertEqual('\u00F8', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x133C) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_RIGHT, extended_char) + self.assertEqual('\u250F', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B3C) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_RIGHT, extended_char) + self.assertEqual('\u250F', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x133D) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_LEFT, extended_char) + self.assertEqual('\u2513', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B3D) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_LEFT, extended_char) + self.assertEqual('\u2513', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x133E) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_RIGHT, extended_char) + self.assertEqual('\u2517', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B3E) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_RIGHT, extended_char) + self.assertEqual('\u2517', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x133F) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_LEFT, extended_char) + self.assertEqual('\u251B', extended_char.get_unicode_value()) + + extended_char = SccExtendedCharacter.find(0x1B3F) + self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_LEFT, extended_char) + self.assertEqual('\u251B', extended_char.get_unicode_value()) + + +if __name__ == '__main__': + unittest.main() diff --git a/src/test/python/test_scc_special_characters.py b/src/test/python/test_scc_special_characters.py index 727baa51..8c67bb9b 100644 --- a/src/test/python/test_scc_special_characters.py +++ b/src/test/python/test_scc_special_characters.py @@ -23,16 +23,17 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -"""Unit tests for the SCC Special and Extended characters""" +"""Unit tests for the SCC Special characters""" # pylint: disable=R0201,C0115,C0116,W0212 import unittest -from ttconv.scc.codes.special_characters import SccSpecialCharacter, SccExtendedCharacter +from ttconv.scc.codes.extended_characters import SccExtendedCharacter +from ttconv.scc.codes.special_characters import SccSpecialCharacter -class SccExtendedCharacterTest(unittest.TestCase): +class SccSpecialCharactersTest(unittest.TestCase): def test_scc_special_character_values(self): special_char_codes = list(range(0x1130, 0x1140)) + list(range(0x1930, 0x1940)) @@ -175,524 +176,6 @@ def test_scc_special_characters_unicode_values(self): self.assertEqual(SccSpecialCharacter.LOWER_CASE_U_WITH_CIRCUMFLEX, spec_char) self.assertEqual('\u00FB', spec_char.get_unicode_value()) - def test_scc_spanish_extended_characters_unicode_values(self): - extended_char = SccExtendedCharacter.find(0x1220) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_ACUTE, extended_char) - self.assertEqual('\u00C1', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A20) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_ACUTE, extended_char) - self.assertEqual('\u00C1', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1221) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_ACUTE, extended_char) - self.assertEqual('\u00C9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A21) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_ACUTE, extended_char) - self.assertEqual('\u00C9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1222) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_ACUTE, extended_char) - self.assertEqual('\u00D3', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A22) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_ACUTE, extended_char) - self.assertEqual('\u00D3', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1223) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_ACUTE, extended_char) - self.assertEqual('\u00DA', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A23) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_ACUTE, extended_char) - self.assertEqual('\u00DA', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1224) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00DC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A24) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00DC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1225) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00FC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A25) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00FC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1226) - self.assertEqual(SccExtendedCharacter.LEFT_SINGLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u2018', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A26) - self.assertEqual(SccExtendedCharacter.LEFT_SINGLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u2018', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1227) - self.assertEqual(SccExtendedCharacter.INVERTED_EXCLAMATION_MARK, extended_char) - self.assertEqual('\u00A1', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A27) - self.assertEqual(SccExtendedCharacter.INVERTED_EXCLAMATION_MARK, extended_char) - self.assertEqual('\u00A1', extended_char.get_unicode_value()) - - def test_scc_miscellaneous_extended_characters_unicode_values(self): - extended_char = SccExtendedCharacter.find(0x1228) - self.assertEqual(SccExtendedCharacter.ASTERISK, extended_char) - self.assertEqual('\u002A', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A28) - self.assertEqual(SccExtendedCharacter.ASTERISK, extended_char) - self.assertEqual('\u002A', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1229) - self.assertEqual(SccExtendedCharacter.NEUTRAL_SINGLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u0027', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A29) - self.assertEqual(SccExtendedCharacter.NEUTRAL_SINGLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u0027', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x122A) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_HORIZONTAL, extended_char) - self.assertEqual('\u2501', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A2A) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_HORIZONTAL, extended_char) - self.assertEqual('\u2501', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x122B) - self.assertEqual(SccExtendedCharacter.COPYRIGHT_SIGN, extended_char) - self.assertEqual('\u00A9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A2B) - self.assertEqual(SccExtendedCharacter.COPYRIGHT_SIGN, extended_char) - self.assertEqual('\u00A9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x122C) - self.assertEqual(SccExtendedCharacter.SERVICE_MARK, extended_char) - self.assertEqual('\u2120', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A2C) - self.assertEqual(SccExtendedCharacter.SERVICE_MARK, extended_char) - self.assertEqual('\u2120', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x122D) - self.assertEqual(SccExtendedCharacter.BULLET, extended_char) - self.assertEqual('\u2022', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A2D) - self.assertEqual(SccExtendedCharacter.BULLET, extended_char) - self.assertEqual('\u2022', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x122E) - self.assertEqual(SccExtendedCharacter.LEFT_DOUBLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u201C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A2E) - self.assertEqual(SccExtendedCharacter.LEFT_DOUBLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u201C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x122F) - self.assertEqual(SccExtendedCharacter.RIGHT_DOUBLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u201D', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A2F) - self.assertEqual(SccExtendedCharacter.RIGHT_DOUBLE_QUOTATION_MARK, extended_char) - self.assertEqual('\u201D', extended_char.get_unicode_value()) - - def test_scc_french_extended_characters_unicode_values(self): - extended_char = SccExtendedCharacter.find(0x1230) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_GRAVE, extended_char) - self.assertEqual('\u00C0', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A30) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_GRAVE, extended_char) - self.assertEqual('\u00C0', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1231) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00C2', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A31) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00C2', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1232) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, extended_char) - self.assertEqual('\u00C7', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A32) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, extended_char) - self.assertEqual('\u00C7', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1233) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_GRAVE, extended_char) - self.assertEqual('\u00C8', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A33) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_GRAVE, extended_char) - self.assertEqual('\u00C8', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1234) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00CA', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A34) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00CA', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1235) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00CB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A35) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00CB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1236) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_E_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00EB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A36) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_E_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00EB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1237) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00CE', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A37) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00CE', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1238) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00CF', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A38) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00CF', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1239) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00EF', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A39) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00EF', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x123A) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00D4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A3A) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00D4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x123B) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_GRAVE, extended_char) - self.assertEqual('\u00D9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A3B) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_GRAVE, extended_char) - self.assertEqual('\u00D9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x123C) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_GRAVE, extended_char) - self.assertEqual('\u00F9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A3C) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_U_WITH_GRAVE, extended_char) - self.assertEqual('\u00F9', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x123D) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00DB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A3D) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, extended_char) - self.assertEqual('\u00DB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x123E) - self.assertEqual(SccExtendedCharacter.LEFT_POINTING_GUILLEMET, extended_char) - self.assertEqual('\u00AB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A3E) - self.assertEqual(SccExtendedCharacter.LEFT_POINTING_GUILLEMET, extended_char) - self.assertEqual('\u00AB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x123F) - self.assertEqual(SccExtendedCharacter.RIGHT_POINTING_GUILLEMET, extended_char) - self.assertEqual('\u00BB', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1A3F) - self.assertEqual(SccExtendedCharacter.RIGHT_POINTING_GUILLEMET, extended_char) - self.assertEqual('\u00BB', extended_char.get_unicode_value()) - - def test_scc_portuguese_extended_characters_unicode_values(self): - extended_char = SccExtendedCharacter.find(0x1320) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_TILDE, extended_char) - self.assertEqual('\u00C3', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B20) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_TILDE, extended_char) - self.assertEqual('\u00C3', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1321) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_TILDE, extended_char) - self.assertEqual('\u00E3', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B21) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_TILDE, extended_char) - self.assertEqual('\u00E3', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1322) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_ACUTE, extended_char) - self.assertEqual('\u00CD', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B22) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_ACUTE, extended_char) - self.assertEqual('\u00CD', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1323) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_GRAVE, extended_char) - self.assertEqual('\u00CC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B23) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_I_WITH_GRAVE, extended_char) - self.assertEqual('\u00CC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1324) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_GRAVE, extended_char) - self.assertEqual('\u00EC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B24) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_I_WITH_GRAVE, extended_char) - self.assertEqual('\u00EC', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1325) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_GRAVE, extended_char) - self.assertEqual('\u00D2', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B25) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_GRAVE, extended_char) - self.assertEqual('\u00D2', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1326) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_GRAVE, extended_char) - self.assertEqual('\u00F2', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B26) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_GRAVE, extended_char) - self.assertEqual('\u00F2', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1327) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_TILDE, extended_char) - self.assertEqual('\u00D5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B27) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_TILDE, extended_char) - self.assertEqual('\u00D5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1328) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_TILDE, extended_char) - self.assertEqual('\u00F5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B28) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_TILDE, extended_char) - self.assertEqual('\u00F5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1329) - self.assertEqual(SccExtendedCharacter.BRACE_OPENING, extended_char) - self.assertEqual('\u007B', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B29) - self.assertEqual(SccExtendedCharacter.BRACE_OPENING, extended_char) - self.assertEqual('\u007B', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x132A) - self.assertEqual(SccExtendedCharacter.BRACE_CLOSING, extended_char) - self.assertEqual('\u007D', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B2A) - self.assertEqual(SccExtendedCharacter.BRACE_CLOSING, extended_char) - self.assertEqual('\u007D', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x132B) - self.assertEqual(SccExtendedCharacter.REVERSE_SOLIDUS, extended_char) - self.assertEqual('\u005C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B2B) - self.assertEqual(SccExtendedCharacter.REVERSE_SOLIDUS, extended_char) - self.assertEqual('\u005C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x132C) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_TURNED_V, extended_char) - self.assertEqual('\u028C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B2C) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_TURNED_V, extended_char) - self.assertEqual('\u028C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x132D) - self.assertEqual(SccExtendedCharacter.LOW_LINE, extended_char) - self.assertEqual('\u005F', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B2D) - self.assertEqual(SccExtendedCharacter.LOW_LINE, extended_char) - self.assertEqual('\u005F', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x132E) - self.assertEqual(SccExtendedCharacter.VERTICAL_LINE, extended_char) - self.assertEqual('\u007C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B2E) - self.assertEqual(SccExtendedCharacter.VERTICAL_LINE, extended_char) - self.assertEqual('\u007C', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x132F) - self.assertEqual(SccExtendedCharacter.TILDE, extended_char) - self.assertEqual('\u007E', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B2F) - self.assertEqual(SccExtendedCharacter.TILDE, extended_char) - self.assertEqual('\u007E', extended_char.get_unicode_value()) - - def test_scc_german_extended_characters_unicode_values(self): - extended_char = SccExtendedCharacter.find(0x1330) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00C4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B30) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00C4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1331) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00E4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B31) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00E4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1332) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00D6', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B32) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00D6', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1333) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00F6', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B33) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_DIAERESIS, extended_char) - self.assertEqual('\u00F6', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1334) - self.assertEqual(SccExtendedCharacter.ESZETT, extended_char) - self.assertEqual('\u00DF', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B34) - self.assertEqual(SccExtendedCharacter.ESZETT, extended_char) - self.assertEqual('\u00DF', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1335) - self.assertEqual(SccExtendedCharacter.YEN_SIGN, extended_char) - self.assertEqual('\u00A5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B35) - self.assertEqual(SccExtendedCharacter.YEN_SIGN, extended_char) - self.assertEqual('\u00A5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1336) - self.assertEqual(SccExtendedCharacter.CURRENCY_SIGN, extended_char) - self.assertEqual('\u00A4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B36) - self.assertEqual(SccExtendedCharacter.CURRENCY_SIGN, extended_char) - self.assertEqual('\u00A4', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1337) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_VERTICAL, extended_char) - self.assertEqual('\u2503', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B37) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_VERTICAL, extended_char) - self.assertEqual('\u2503', extended_char.get_unicode_value()) - - def test_scc_danish_extended_characters_unicode_values(self): - extended_char = SccExtendedCharacter.find(0x1338) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, extended_char) - self.assertEqual('\u00C5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B38) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, extended_char) - self.assertEqual('\u00C5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1339) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, extended_char) - self.assertEqual('\u00E5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B39) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, extended_char) - self.assertEqual('\u00E5', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x133A) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_STROKE, extended_char) - self.assertEqual('\u00D8', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B3A) - self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_O_WITH_STROKE, extended_char) - self.assertEqual('\u00D8', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x133B) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_STROKE, extended_char) - self.assertEqual('\u00F8', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B3B) - self.assertEqual(SccExtendedCharacter.LATIN_SMALL_LETTER_O_WITH_STROKE, extended_char) - self.assertEqual('\u00F8', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x133C) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_RIGHT, extended_char) - self.assertEqual('\u250F', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B3C) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_RIGHT, extended_char) - self.assertEqual('\u250F', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x133D) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_LEFT, extended_char) - self.assertEqual('\u2513', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B3D) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_DOWN_AND_LEFT, extended_char) - self.assertEqual('\u2513', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x133E) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_RIGHT, extended_char) - self.assertEqual('\u2517', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B3E) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_RIGHT, extended_char) - self.assertEqual('\u2517', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x133F) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_LEFT, extended_char) - self.assertEqual('\u251B', extended_char.get_unicode_value()) - - extended_char = SccExtendedCharacter.find(0x1B3F) - self.assertEqual(SccExtendedCharacter.BOX_DRAWINGS_HEAVY_UP_AND_LEFT, extended_char) - self.assertEqual('\u251B', extended_char.get_unicode_value()) - if __name__ == '__main__': unittest.main() diff --git a/src/test/python/test_scc_word.py b/src/test/python/test_scc_word.py index 4931b1b3..682d96f2 100644 --- a/src/test/python/test_scc_word.py +++ b/src/test/python/test_scc_word.py @@ -30,9 +30,9 @@ import unittest from ttconv.scc.codes.control_codes import SccControlCode +from ttconv.scc.codes.extended_characters import SccExtendedCharacter from ttconv.scc.codes.mid_row_codes import SccMidRowCode from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode -from ttconv.scc.codes.special_characters import SccExtendedCharacter from ttconv.scc.word import SccWord @@ -110,29 +110,29 @@ def test_scc_word_get_code(self): self.assertEqual(SccControlCode.RCL, SccWord.from_str("9420").get_code()) self.assertEqual(SccMidRowCode.ITALICS, SccWord.from_str("91ae").get_code()) self.assertEqual(SccControlCode.BS, SccWord.from_str("9421").get_code()) - self.assertEqual(None, SccWord.from_str("4c6f").get_code()) # "Lo" - self.assertEqual(None, SccWord.from_str("7265").get_code()) # "re" - self.assertEqual(None, SccWord.from_str("6d20").get_code()) # "m " - self.assertEqual(None, SccWord.from_str("6970").get_code()) # "ip" - self.assertEqual(None, SccWord.from_str("7375").get_code()) # "su" - self.assertEqual(None, SccWord.from_str("6d20").get_code()) # "m " + self.assertEqual(None, SccWord.from_str("4c6f").get_code()) # "Lo" + self.assertEqual(None, SccWord.from_str("7265").get_code()) # "re" + self.assertEqual(None, SccWord.from_str("6d20").get_code()) # "m " + self.assertEqual(None, SccWord.from_str("6970").get_code()) # "ip" + self.assertEqual(None, SccWord.from_str("7375").get_code()) # "su" + self.assertEqual(None, SccWord.from_str("6d20").get_code()) # "m " self.assertEqual(SccExtendedCharacter.LATIN_CAPITAL_LETTER_A_WITH_ACUTE, SccWord.from_str("9220").get_code()) self.assertEqual(SccControlCode.EDM, SccWord.from_str("942c").get_code()) self.assertEqual(SccControlCode.EOC, SccWord.from_str("942f").get_code()) self.assertEqual(SccControlCode.RU2, SccWord.from_str("9425").get_code()) self.assertEqual(SccControlCode.CR, SccWord.from_str("94ad").get_code()) self.assertEqual(SccPreambleAddressCode, SccWord.from_str("9673").get_code().__class__) - self.assertEqual(None, SccWord.from_str("636f").get_code()) # "co" - self.assertEqual(None, SccWord.from_str("6e73").get_code()) # "ns" - self.assertEqual(None, SccWord.from_str("6563").get_code()) # "ec" - self.assertEqual(None, SccWord.from_str("7465").get_code()) # "te" - self.assertEqual(None, SccWord.from_str("7475").get_code()) # "tu" - self.assertEqual(None, SccWord.from_str("7220").get_code()) # "r " - self.assertEqual(None, SccWord.from_str("6164").get_code()) # "ad" - self.assertEqual(None, SccWord.from_str("6970").get_code()) # "ip" - self.assertEqual(None, SccWord.from_str("6973").get_code()) # "is" - self.assertEqual(None, SccWord.from_str("6369").get_code()) # "ci" - self.assertEqual(None, SccWord.from_str("6e67").get_code()) # "ng" - self.assertEqual(None, SccWord.from_str("2065").get_code()) # " e" - self.assertEqual(None, SccWord.from_str("6c69").get_code()) # "li" - self.assertEqual(None, SccWord.from_str("742e").get_code()) # "t." + self.assertEqual(None, SccWord.from_str("636f").get_code()) # "co" + self.assertEqual(None, SccWord.from_str("6e73").get_code()) # "ns" + self.assertEqual(None, SccWord.from_str("6563").get_code()) # "ec" + self.assertEqual(None, SccWord.from_str("7465").get_code()) # "te" + self.assertEqual(None, SccWord.from_str("7475").get_code()) # "tu" + self.assertEqual(None, SccWord.from_str("7220").get_code()) # "r " + self.assertEqual(None, SccWord.from_str("6164").get_code()) # "ad" + self.assertEqual(None, SccWord.from_str("6970").get_code()) # "ip" + self.assertEqual(None, SccWord.from_str("6973").get_code()) # "is" + self.assertEqual(None, SccWord.from_str("6369").get_code()) # "ci" + self.assertEqual(None, SccWord.from_str("6e67").get_code()) # "ng" + self.assertEqual(None, SccWord.from_str("2065").get_code()) # " e" + self.assertEqual(None, SccWord.from_str("6c69").get_code()) # "li" + self.assertEqual(None, SccWord.from_str("742e").get_code()) # "t." From 616b99c789b8581890fac6c6ec5ef44b0cb48d44 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Thu, 10 Aug 2023 17:20:39 +0200 Subject: [PATCH 07/13] SCC: fix backspace operation --- src/main/python/ttconv/scc/caption_line.py | 2 +- src/main/python/ttconv/scc/caption_text.py | 1 + src/main/python/ttconv/scc/context.py | 43 ++++++++++++---------- src/main/python/ttconv/scc/line.py | 5 +-- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/main/python/ttconv/scc/caption_line.py b/src/main/python/ttconv/scc/caption_line.py index b26cf27c..10b40aa8 100644 --- a/src/main/python/ttconv/scc/caption_line.py +++ b/src/main/python/ttconv/scc/caption_line.py @@ -67,7 +67,7 @@ def add_text(self, text: Union[SccCaptionText, str]): # While the cursor is not on the last text element, and some text remains while self._current_text is not self._texts[-1] and len(remaining_text) > 0: - available = self._current_text.get_length() - self.get_current_text().get_cursor() + available = self._current_text.get_length() - self._current_text.get_cursor() text_to_write = remaining_text[:available] # Replace current text element content diff --git a/src/main/python/ttconv/scc/caption_text.py b/src/main/python/ttconv/scc/caption_text.py index cc31da4a..886f1430 100644 --- a/src/main/python/ttconv/scc/caption_text.py +++ b/src/main/python/ttconv/scc/caption_text.py @@ -95,6 +95,7 @@ def get_cursor(self) -> int: def backspace(self): """Remove last character""" self._text = self._text[:-1] + self._cursor = max(self._cursor - 1, 0) def get_style_properties(self) -> dict: """Sets the style properties map""" diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py index f2cc8327..959974e7 100644 --- a/src/main/python/ttconv/scc/context.py +++ b/src/main/python/ttconv/scc/context.py @@ -88,6 +88,16 @@ def set_safe_area(self, safe_area_x_offset: int, safe_area_y_offset: int): self.safe_area_x_offset = safe_area_x_offset self.safe_area_y_offset = safe_area_y_offset + def get_caption_to_process(self): + """Returns the caption currently being processed""" + if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): + # If the Paint-On or Roll-Up style is activated, write directly on active caption + return self.active_caption + if self.current_style is SccCaptionStyle.PopOn: + # For Pop-On style, write first on a buffered caption + return self.buffered_caption + raise ValueError("SCC caption style not defined") + def has_active_caption(self) -> bool: """Returns whether captions are being displayed or not""" return self.active_caption is not None @@ -149,6 +159,12 @@ def push_active_caption_to_model(self, time_code: SmpteTimeCode, clear_active_ca self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) + def backspace(self): + """Move the cursors in a column to the left""" + self.get_caption_to_process().get_current_text().backspace() + (row, indent) = self.get_caption_to_process().get_cursor() + self.get_caption_to_process().set_cursor_at(row, max(indent - 1, 0)) + def paint_on_active_caption(self, time_code: SmpteTimeCode): """Initialize active caption for paint-on style""" active_style = SccCaptionStyle.PaintOn @@ -234,10 +250,7 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTimeCode): """Processes SCC Mid-Row Code to map it to the model""" - # If the Paint-On or Roll-Up style is activated, write directly on active caption - processed_caption = self.buffered_caption - if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - processed_caption = self.active_caption + processed_caption = self.get_caption_to_process() if processed_caption is None: raise ValueError("No current SCC caption initialized") @@ -290,10 +303,7 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime def process_attribute_code(self, attribute_code: SccAttributeCode): """Processes SCC Attribute Code to map it to the model""" - # If the Paint-On or Roll-Up style is activated, write directly on active caption - processed_caption = self.buffered_caption - if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - processed_caption = self.active_caption + processed_caption = self.get_caption_to_process() if processed_caption is None or processed_caption.get_current_text() is None: raise ValueError("No current SCC caption nor content initialized") @@ -312,8 +322,6 @@ def process_attribute_code(self, attribute_code: SccAttributeCode): def process_control_code(self, control_code: SccControlCode, time_code: SmpteTimeCode): """Processes SCC Control Code to map it to the model""" - processed_caption = self.buffered_caption - if control_code is SccControlCode.RCL: # Start a new Pop-On caption self.current_style = SccCaptionStyle.PopOn @@ -335,12 +343,7 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim elif control_code is SccControlCode.RU4: self.roll_up_depth = 4 - else: - # If the Paint-On or Roll-Up style is activated, write directly on active caption - if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - processed_caption = self.active_caption - - if control_code is SccControlCode.EOC: + elif control_code is SccControlCode.EOC: # Display caption (Pop-On) self.set_buffered_caption_begin_time(time_code) self.flip_buffered_to_active_captions(time_code) @@ -372,13 +375,13 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim self.buffered_caption = None elif control_code is SccControlCode.TO1: - processed_caption.indent_cursor(1) + self.get_caption_to_process().indent_cursor(1) elif control_code is SccControlCode.TO2: - processed_caption.indent_cursor(2) + self.get_caption_to_process().indent_cursor(2) elif control_code is SccControlCode.TO3: - processed_caption.indent_cursor(3) + self.get_caption_to_process().indent_cursor(3) elif control_code is SccControlCode.CR: # Roll the displayed caption up one row (Roll-Up) @@ -410,7 +413,7 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim # Backspace # When a Backspace is received, the cursor moves to the left one column position erasing # the character or Mid-Row Code occupying that location, unless the cursor is in Column 1 - processed_caption.get_current_text().backspace() + self.backspace() def process_text(self, word: str, time_code: SmpteTimeCode): """Processes SCC text words""" diff --git a/src/main/python/ttconv/scc/line.py b/src/main/python/ttconv/scc/line.py index b9af4821..0fc6f933 100644 --- a/src/main/python/ttconv/scc/line.py +++ b/src/main/python/ttconv/scc/line.py @@ -206,10 +206,7 @@ def process(self, context: SccContext) -> SmpteTimeCode: context.previous_word_type = type(scc_code) elif isinstance(scc_code, SccExtendedCharacter): - if context.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): - context.active_caption.get_current_text().backspace() - else: - context.buffered_caption.get_current_text().backspace() + context.backspace() word = scc_code.get_unicode_value() debug += word From 1aab46a292bae3cf37fd2f1b0f98fe22a075f3b5 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Fri, 11 Aug 2023 16:53:30 +0200 Subject: [PATCH 08/13] SCC: reader handles captions without Preamble Address Code Related to https://github.com/sandflow/ttconv/pull/385 --- .../python/ttconv/scc/caption_paragraph.py | 10 +- src/main/python/ttconv/scc/context.py | 108 +++++---- src/main/python/ttconv/scc/reader.py | 7 +- src/test/python/test_scc_reader.py | 206 +++++++++++++++++- 4 files changed, 275 insertions(+), 56 deletions(-) diff --git a/src/main/python/ttconv/scc/caption_paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py index ecef20a3..9452296a 100644 --- a/src/main/python/ttconv/scc/caption_paragraph.py +++ b/src/main/python/ttconv/scc/caption_paragraph.py @@ -33,8 +33,8 @@ from ttconv.model import Region, ContentDocument, P, Br, Span, Text from ttconv.scc.caption_line import SccCaptionLine -from ttconv.scc.caption_text import SccCaptionText from ttconv.scc.caption_style import SccCaptionStyle +from ttconv.scc.caption_text import SccCaptionText from ttconv.scc.utils import get_position_from_offsets, get_extent_from_dimensions, convert_cells_to_percentages from ttconv.style_properties import CoordinateType, ExtentType, StyleProperties, LengthType, DisplayAlignType, ShowBackgroundType, \ TextAlignType, NamedColors @@ -86,7 +86,7 @@ def set_begin(self, time_code): """Sets caption begin time code""" self._begin = copy.copy(time_code) - def get_begin(self) -> SmpteTimeCode: + def get_begin(self) -> Optional[SmpteTimeCode]: """Returns the caption begin time code""" return self._begin @@ -94,7 +94,7 @@ def set_end(self, time_code): """Sets caption end time code""" self._end = copy.copy(time_code) - def get_end(self) -> SmpteTimeCode: + def get_end(self) -> Optional[SmpteTimeCode]: """Returns the caption end time code""" return self._end @@ -106,6 +106,10 @@ def get_safe_area_y_offset(self): """Returns the safe area y offset""" return self._safe_area_y_offset + def set_caption_style(self, caption_style: SccCaptionStyle): + """Sets the caption style""" + self._caption_style = caption_style + def get_caption_style(self) -> SccCaptionStyle: """Returns the caption style""" return self._caption_style diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py index 959974e7..c46d10ed 100644 --- a/src/main/python/ttconv/scc/context.py +++ b/src/main/python/ttconv/scc/context.py @@ -48,7 +48,7 @@ class SccContext: """SCC context for reader""" - def __init__(self, config: Optional[SccReaderConfiguration] = None): + def __init__(self, safe_area_x_offset: int, safe_area_y_offset: int, config: Optional[SccReaderConfiguration] = None): # Caption paragraphs container self.div: Optional[Div] = None @@ -56,19 +56,19 @@ def __init__(self, config: Optional[SccReaderConfiguration] = None): self.count: int = 0 # Screen safe area offsets - self.safe_area_x_offset: int = 0 - self.safe_area_y_offset: int = 0 + self.safe_area_x_offset = safe_area_x_offset + self.safe_area_y_offset = safe_area_y_offset # Previously read SCC word value self.previous_word: Optional[SccWord] = None self.previous_word_type: Optional[Type] = None # Buffered caption being built - self.buffered_caption: Optional[SccCaptionParagraph] = None + self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) # Captions being displayed - self.active_caption: Optional[SccCaptionParagraph] = None + self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) # Caption style (Pop-on, Roll-up, Paint-on) currently processed - self.current_style: Optional[SccCaptionStyle] = None + self.current_style = SccCaptionStyle.Unknown # Roll-up caption number of lines self.roll_up_depth: int = 0 @@ -83,10 +83,13 @@ def __init__(self, config: Optional[SccReaderConfiguration] = None): # Text alignment self.text_alignment = TextAlignment.AUTO if config is None else config.text_align - def set_safe_area(self, safe_area_x_offset: int, safe_area_y_offset: int): - """Sets the safe area""" - self.safe_area_x_offset = safe_area_x_offset - self.safe_area_y_offset = safe_area_y_offset + def reset_active_caption(self): + """Resets caption being displayed""" + self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) + + def reset_buffered_caption(self): + """Resets buffered caption""" + self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) def get_caption_to_process(self): """Returns the caption currently being processed""" @@ -100,31 +103,29 @@ def get_caption_to_process(self): def has_active_caption(self) -> bool: """Returns whether captions are being displayed or not""" - return self.active_caption is not None + return self.active_caption.get_begin() is not None def set_buffered_caption_begin_time(self, time_code: SmpteTimeCode): """Initializes the current buffered caption with begin time""" - if self.buffered_caption is not None: - self.buffered_caption.set_begin(time_code) + self.buffered_caption.set_begin(time_code) def initialize_active_caption(self, begin_time_code: SmpteTimeCode): """Initializes the current active caption with id and begin time""" - if self.active_caption is not None: - if not self.active_caption.get_id(): - self.count += 1 - self.active_caption.set_id("caption" + str(self.count)) + if not self.active_caption.get_id(): + self.count += 1 + self.active_caption.set_id("caption" + str(self.count)) - self.active_caption.set_begin(begin_time_code) + self.active_caption.set_begin(begin_time_code) def push_buffered_to_active_captions(self): """Send the current buffered caption to the active captions list""" - if self.buffered_caption is not None and self.buffered_caption.get_current_text(): + if self.buffered_caption.get_current_text(): if not self.buffered_caption.get_id(): self.count += 1 self.buffered_caption.set_id("caption" + str(self.count)) self.active_caption = self.buffered_caption - self.buffered_caption = None + self.reset_buffered_caption() def flip_buffered_to_active_captions(self, time_code: Optional[SmpteTimeCode] = None): """ @@ -155,7 +156,7 @@ def push_active_caption_to_model(self, time_code: SmpteTimeCode, clear_active_ca previous_caption.set_end(time_code) if clear_active_caption: - self.active_caption = None + self.reset_active_caption() self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) @@ -182,7 +183,8 @@ def paint_on_active_caption(self, time_code: SmpteTimeCode): self.push_active_caption_to_model(time_code) # Initialize new buffered caption - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, active_style) + self.reset_active_caption() + self.active_caption.set_caption_style(active_style) self.initialize_active_caption(time_code) if len(copied_lines) > 0: @@ -216,7 +218,10 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: if not self.has_active_caption(): # If there is no current active caption, initialize an empty new paragraph - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.RollUp) + self.reset_active_caption() + self.active_caption.set_caption_style(SccCaptionStyle.RollUp) + + if self.active_caption.get_begin() is None: self.initialize_active_caption(time_code) # Ignore PACs for rows 5-11, but get indent from PACs for rows 1-4 and 12-15. (Roll-Up) @@ -230,15 +235,12 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: self.active_caption.new_caption_text() - else: # Pop-On Style - - if self.buffered_caption is None: - self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.PopOn) - + elif self.current_style is SccCaptionStyle.PopOn: # set cursor in paragraph and create line or text if necessary self.buffered_caption.set_cursor_at(pac_row, pac_indent) - self.buffered_caption.new_caption_text() + else: + raise ValueError("SCC caption style not defined") self.current_color = pac.get_color() self.current_font_style = pac.get_font_style() @@ -252,9 +254,6 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime processed_caption = self.get_caption_to_process() - if processed_caption is None: - raise ValueError("No current SCC caption initialized") - color = mid_row_code.get_color() font_style = mid_row_code.get_font_style() text_decoration = mid_row_code.get_text_decoration() @@ -326,6 +325,11 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim # Start a new Pop-On caption self.current_style = SccCaptionStyle.PopOn + if self.buffered_caption.get_caption_style() is SccCaptionStyle.Unknown: + self.buffered_caption.set_caption_style(self.current_style) + self.buffered_caption.new_caption_line() + self.buffered_caption.new_caption_text() + elif control_code is SccControlCode.RDC: # Start a new Paint-On caption self.current_style = SccCaptionStyle.PaintOn @@ -343,6 +347,18 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim elif control_code is SccControlCode.RU4: self.roll_up_depth = 4 + if not self.has_active_caption(): + # If there is currently no active caption, initialize an empty new paragraph + self.reset_active_caption() + self.initialize_active_caption(time_code) + + self.active_caption.set_caption_style(SccCaptionStyle.RollUp) + self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW, 0) + self.active_caption.new_caption_line() + self.active_caption.new_caption_text() + + self.active_cursor = self.active_caption.get_cursor() + elif control_code is SccControlCode.EOC: # Display caption (Pop-On) self.set_buffered_caption_begin_time(time_code) @@ -372,7 +388,7 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim elif control_code is SccControlCode.ENM: # Erase buffered caption - self.buffered_caption = None + self.reset_buffered_caption() elif control_code is SccControlCode.TO1: self.get_caption_to_process().indent_cursor(1) @@ -387,19 +403,24 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim # Roll the displayed caption up one row (Roll-Up) if self.has_active_caption(): - # Push active caption to model (but don't erase it) - self.push_active_caption_to_model(time_code, False) - # Roll the active caption up - self.active_caption.roll_up() - # Get the remaining lines to initialize the following caption with the expected depth - previous_lines = self.active_caption.get_last_caption_lines(self.roll_up_depth - 1) + if self.active_caption.get_current_text().is_empty(): + self.count -= 1 + previous_lines = [] + else: + # Push active caption to model (but don't erase it) + self.push_active_caption_to_model(time_code, False) + # Roll the active caption up + self.active_caption.roll_up() + # Get the remaining lines to initialize the following caption with the expected depth + previous_lines = self.active_caption.get_last_caption_lines(self.roll_up_depth - 1) # Initialize the new caption with the previous lines - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.RollUp) + self.reset_active_caption() self.initialize_active_caption(time_code) + self.active_caption.set_caption_style(SccCaptionStyle.RollUp) self.active_caption.set_lines(previous_lines) - self.active_caption.set_cursor_at(self.active_cursor[0], self.active_cursor[1]) + self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW) elif control_code is SccControlCode.DER: # Delete to End of Row (Paint-On) @@ -471,6 +492,5 @@ def flush(self, time_code: Optional[SmpteTimeCode] = None): if self.has_active_caption(): self.push_active_caption_to_model(time_code) - if self.buffered_caption is not None: - # Remove the buffered caption - self.buffered_caption = None + # Remove the buffered caption + self.reset_buffered_caption() diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index 3ddb4d12..03873d07 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -48,15 +48,16 @@ def to_model(scc_content: str, config: Optional[SccReaderConfiguration] = None, progress_callback=lambda _: None): """Converts a SCC document to the data model""" - context = SccContext(config) document = ContentDocument() # Safe area must be a 32x15 grid, that represents 80% of the root area root_cell_resolution = CellResolutionType(rows=SCC_ROOT_CELL_RESOLUTION_ROWS, columns=SCC_ROOT_CELL_RESOLUTION_COLUMNS) document.set_cell_resolution(root_cell_resolution) - context.set_safe_area(int((root_cell_resolution.columns - SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS) / 2), - int((root_cell_resolution.rows - SCC_SAFE_AREA_CELL_RESOLUTION_ROWS) / 2)) + safe_area_x_offset = int((root_cell_resolution.columns - SCC_SAFE_AREA_CELL_RESOLUTION_COLUMNS) / 2) + safe_area_y_offset = int((root_cell_resolution.rows - SCC_SAFE_AREA_CELL_RESOLUTION_ROWS) / 2) + + context = SccContext(safe_area_x_offset, safe_area_y_offset, config) # The active area is equivalent to the safe area active_area = ActiveAreaType( diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index 5eb25eb0..cf708d40 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -27,6 +27,7 @@ # pylint: disable=R0201,C0115,C0116,W0212 import unittest +from fractions import Fraction from numbers import Number from typing import Union, Type, Optional @@ -50,10 +51,10 @@ class SccReaderTest(unittest.TestCase): def check_caption(self, paragraph: P, caption_id: str, begin: str, end: Optional[str], *children): self.assertEqual(caption_id, paragraph.get_id()) - self.assertEqual(SmpteTimeCode.parse(begin, FPS_30).to_temporal_offset(), paragraph.get_begin()) + self.check_element_timecode(paragraph.get_begin(), begin) if end is not None: - self.assertEqual(SmpteTimeCode.parse(end, FPS_30).to_temporal_offset(), paragraph.get_end()) + self.check_element_timecode(paragraph.get_end(), end) p_children = list(paragraph) self.assertEqual(len(children), len(p_children)) @@ -66,6 +67,9 @@ def check_caption(self, paragraph: P, caption_id: str, begin: str, end: Optional else: self.assertEqual(expected_child, Br) + def check_element_timecode(self, timecode: Fraction, expected_timecode: str): + self.assertEqual(SmpteTimeCode.parse(expected_timecode, FPS_30).to_temporal_offset(), timecode) + def check_element_style(self, elem: ContentElement, style_property: Type[StyleProperty], expected_value): self.assertEqual(expected_value, elem.get_style(style_property)) @@ -250,6 +254,62 @@ def test_scc_pop_on_content_unexpectedly_ended(self): "consectetur adipiscing elit.") self.assertEqual(region_1, p_list[0].get_region()) + def test_scc_pop_on_content_without_preamble_address_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:02:16 942c + +00:00:03:01 9420 91ae 9421 4c6f 7265 6d20 6970 7375 6d20 9220 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 9420 656e 7465 7371 7565 2069 6e74 6572 6475 6d20 6c61 6369 6e69 6120 736f 6c6c 6963 6974 7564 696e 2e80 942c 942f + +00:00:11:27 9420 +""" + + scc_disassembly = """\ +00:00:02:16 {EDM} +00:00:03:01 {RCL}{I}{BS}Lorem ipsum Á{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {RCL}entesque interdum lacinia sollicitudin.{EDM}{EOC} +00:00:11:27 {RCL} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 1, doc.get_cell_resolution()) + self.check_region_extent(region_1, 32, 16, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(3, len(p_list)) + + self.check_caption(p_list[0], "caption1", "00:00:03:13", "00:00:08:26", "Lorem ipsumÁ") + self.assertEqual(region_1, p_list[0].get_region()) + + self.check_caption(p_list[1], "caption2", "00:00:08:26", "00:00:10:00", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + + self.check_caption(p_list[2], "caption3", "00:00:10:00", None, "entesque interdum lacinia sollicitudin.") + self.assertEqual(region_1, p_list[0].get_region()) + def test_scc_double_word_in_content(self): scc_content = """"Scenarist_SCC V1.0 01:02:53:14 9420 9420 94AE 94AE 9452 9452 97A1 97A1 20F2 E56D E56D 62E5 F220 9137 9137 9137 9137 942F 942F @@ -324,7 +384,7 @@ def test_2_rows_roll_up_content(self): expected_text = LOREM_IPSUM.splitlines() - self.check_caption(p_list[0], "caption1", "00:00:00:25", "00:00:02:25", expected_text[0]) + self.check_caption(p_list[0], "caption1", "00:00:00:24", "00:00:02:25", expected_text[0]) self.assertEqual(region_1, p_list[0].get_region()) self.check_caption(p_list[1], "caption2", "00:00:02:25", "00:00:04:19", expected_text[0], Br, expected_text[1]) @@ -391,7 +451,7 @@ def test_3_rows_roll_up_content(self): expected_text = LOREM_IPSUM.splitlines() - self.check_caption(p_list[0], "caption1", "00:00:17;04", "00:00:18;21", expected_text[0]) + self.check_caption(p_list[0], "caption1", "00:00:17;03", "00:00:18;21", expected_text[0]) self.assertEqual(region_1, p_list[0].get_region()) self.check_caption(p_list[1], "caption2", "00:00:18;21", "00:00:20;08", expected_text[0], Br, expected_text[1]) @@ -460,7 +520,7 @@ def test_4_rows_roll_up_content(self): expected_text = LOREM_IPSUM.splitlines() - self.check_caption(p_list[0], "caption1", "00:00:35;00", "00:00:36;14", expected_text[0]) + self.check_caption(p_list[0], "caption1", "00:00:34;29", "00:00:36;14", expected_text[0]) self.assertEqual(region_1, p_list[0].get_region()) self.check_caption(p_list[1], "caption2", "00:00:36;14", "00:00:44;10", expected_text[0], Br, expected_text[1]) @@ -566,7 +626,7 @@ def test_mix_rows_roll_up_content(self): p_list = list(div) self.assertEqual(16, len(p_list)) - self.check_caption(p_list[0], "caption1", "00:00:00;25", "00:00:02;25", ">>> HI.") + self.check_caption(p_list[0], "caption1", "00:00:00;24", "00:00:02;25", ">>> HI.") self.assertEqual(region_1, p_list[0].get_region()) self.check_caption(p_list[1], "caption2", "00:00:02;25", "00:00:04;19", ">>> HI.", Br, "I'M KEVIN CUNNING AND AT") @@ -639,6 +699,63 @@ def test_mix_rows_roll_up_content(self): if span.get_style(StyleProperties.BackgroundColor) != SccAttributeCode.BMS.value[2]: self.check_element_style(span, StyleProperties.BackgroundColor, NamedColors.black.value) + def test_scc_roll_up_content_without_preamble_address_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:34:27 9425 94ad 4c6f 7265 6d20 6970 7375 6d20 646f 6c6f 7220 7369 7420 616d 6574 2c80 + +00:00:36:12 9425 94ad 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e + +00:00:44:08 9425 94ad 5065 6c6c 656e 7465 7371 7565 2069 6e74 6572 6475 6d20 6c61 6369 6e69 6120 736f 6c6c 6963 6974 7564 696e 2e80 + +""" + + scc_disassembly = """\ +00:00:34:27 {RU2}{CR}Lorem ipsum dolor sit amet, +00:00:36:12 {RU2}{CR}consectetur adipiscing elit. +00:00:44:08 {RU2}{CR}Pellentesque interdum lacinia sollicitudin. +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("rollup1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 2, doc.get_cell_resolution()) + self.check_region_extent(region_1, 32, 15, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.after) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(3, len(p_list)) + + expected_text = LOREM_IPSUM.splitlines() + + self.check_caption(p_list[0], "caption1", "00:00:34:29", "00:00:36:14", expected_text[0]) + self.assertEqual(region_1, p_list[0].get_region()) + + self.check_caption(p_list[1], "caption2", "00:00:36:14", "00:00:44:10", expected_text[0], Br, expected_text[1]) + self.assertEqual(region_1, p_list[1].get_region()) + + self.check_caption(p_list[2], "caption3", "00:00:44:10", None, expected_text[1], Br, + expected_text[2]) + self.assertEqual(region_1, p_list[2].get_region()) + + for p in p_list: + for span in [elem for elem in list(p) if isinstance(elem, Span)]: + self.check_element_style(span, StyleProperties.BackgroundColor, NamedColors.black.value) + def test_scc_paint_on_content(self): scc_content = """\ Scenarist_SCC V1.0 @@ -739,6 +856,83 @@ def test_scc_paint_on_content(self): for span in [elem for elem in list(p) if isinstance(elem, Span)]: self.check_element_style(span, StyleProperties.BackgroundColor, NamedColors.black.value) + def test_scc_paint_on_content_without_preamble_address_codes(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:02:53:14 9429 4c6f 7265 6d20 6970 7375 6d20 646f 6c6f 7220 7369 7420 616d 6574 2c80 + +00:02:56:00 9429 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e + +00:02:56:25 9429 5065 6c6c 656e 7465 7371 7565 2069 6e74 6572 6475 6d20 95d0 6c61 6369 6e69 6120 736f 6c6c 6963 6974 7564 696e 2e80 + +""" + + scc_disassembly = """\ +00:02:53:14 {RDC}Lorem ipsum dolor sit amet, +00:02:56:00 {RDC}consectetur adipiscing elit. +00:02:56:25 {RDC}Pellentesque interdum {0500}lacinia sollicitudin. +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("paint1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 1, doc.get_cell_resolution()) + self.check_region_extent(region_1, 32, 16, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(2, len(p_list)) + + paragraph = p_list[0] + self.check_caption(paragraph, "caption1", "00:02:53:16", "00:02:57:08", "Lorem ", "ipsum ", "dolor ", "sit ", + "amet,consectetur ", "adipiscing", " elit.Pellentesque", " interdum ") + self.assertEqual(region_1, paragraph.get_region()) + + self.assertIsNone(list(paragraph)[0].get_begin()) + self.check_element_timecode(list(paragraph)[1].get_begin(), "00:00:00:02") + self.check_element_timecode(list(paragraph)[2].get_begin(), "00:00:00:05") + self.check_element_timecode(list(paragraph)[3].get_begin(), "00:00:00:08") + self.check_element_timecode(list(paragraph)[4].get_begin(), "00:00:00:10") + self.check_element_timecode(list(paragraph)[5].get_begin(), "00:00:02:21") + self.check_element_timecode(list(paragraph)[6].get_begin(), "00:00:02:27") + self.check_element_timecode(list(paragraph)[7].get_begin(), "00:00:03:17") + + paragraph = p_list[1] + + self.check_caption(paragraph, "caption2", "00:02:57:08", None, "Lorem ", "ipsum ", "dolor ", "sit ", + "amet,consectetur ", "adipiscing", " elit.Pellentesque", " interdum ", Br, Br, Br, Br, Br, "lacinia ", + "sollicitudin.") + self.assertEqual(region_1, paragraph.get_region()) + + self.assertIsNone(list(paragraph)[0].get_begin()) + self.assertIsNone(list(paragraph)[1].get_begin()) + self.assertIsNone(list(paragraph)[2].get_begin()) + self.assertIsNone(list(paragraph)[3].get_begin()) + self.assertIsNone(list(paragraph)[4].get_begin()) + self.assertIsNone(list(paragraph)[6].get_begin()) + self.assertIsNone(list(paragraph)[7].get_begin()) + self.assertIsNone(list(paragraph)[13].get_begin()) + self.check_element_timecode(list(paragraph)[14].get_begin(), "00:00:00:04") + + for p in p_list: + for span in [elem for elem in list(p) if isinstance(elem, Span)]: + self.check_element_style(span, StyleProperties.BackgroundColor, NamedColors.black.value) + def test_scc_mid_row_erase_displayed_memory_control_code(self): scc_content = """\ Scenarist_SCC V1.0 From e3c86de227956648a7b37198ae6145d2c955acc7 Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Wed, 16 Aug 2023 11:16:22 +0200 Subject: [PATCH 09/13] SCC: active caption initialization refactoring --- src/main/python/ttconv/scc/context.py | 77 +++++++++++---------------- src/test/python/test_scc_reader.py | 27 ---------- 2 files changed, 30 insertions(+), 74 deletions(-) diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py index c46d10ed..ae1a756c 100644 --- a/src/main/python/ttconv/scc/context.py +++ b/src/main/python/ttconv/scc/context.py @@ -64,9 +64,9 @@ def __init__(self, safe_area_x_offset: int, safe_area_y_offset: int, config: Opt self.previous_word_type: Optional[Type] = None # Buffered caption being built - self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) + self.buffered_caption = None # Captions being displayed - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) + self.active_caption: Optional[SccCaptionParagraph] = None # Caption style (Pop-on, Roll-up, Paint-on) currently processed self.current_style = SccCaptionStyle.Unknown @@ -83,11 +83,16 @@ def __init__(self, safe_area_x_offset: int, safe_area_y_offset: int, config: Opt # Text alignment self.text_alignment = TextAlignment.AUTO if config is None else config.text_align - def reset_active_caption(self): - """Resets caption being displayed""" - self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) + self.new_buffered_caption() + + def new_active_caption(self, begin_time_code: SmpteTimeCode, caption_style: SccCaptionStyle = SccCaptionStyle.Unknown): + """Initializes a new caption being displayed""" + self.active_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, caption_style) + self.count += 1 + self.active_caption.set_id("caption" + str(self.count)) + self.active_caption.set_begin(begin_time_code) - def reset_buffered_caption(self): + def new_buffered_caption(self): """Resets buffered caption""" self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) @@ -103,29 +108,8 @@ def get_caption_to_process(self): def has_active_caption(self) -> bool: """Returns whether captions are being displayed or not""" - return self.active_caption.get_begin() is not None - - def set_buffered_caption_begin_time(self, time_code: SmpteTimeCode): - """Initializes the current buffered caption with begin time""" - self.buffered_caption.set_begin(time_code) - - def initialize_active_caption(self, begin_time_code: SmpteTimeCode): - """Initializes the current active caption with id and begin time""" - if not self.active_caption.get_id(): - self.count += 1 - self.active_caption.set_id("caption" + str(self.count)) - - self.active_caption.set_begin(begin_time_code) - - def push_buffered_to_active_captions(self): - """Send the current buffered caption to the active captions list""" - if self.buffered_caption.get_current_text(): - if not self.buffered_caption.get_id(): - self.count += 1 - self.buffered_caption.set_id("caption" + str(self.count)) - - self.active_caption = self.buffered_caption - self.reset_buffered_caption() + # return self.active_caption.get_begin() is not None + return self.active_caption is not None def flip_buffered_to_active_captions(self, time_code: Optional[SmpteTimeCode] = None): """ @@ -139,10 +123,15 @@ def flip_buffered_to_active_captions(self, time_code: Optional[SmpteTimeCode] = if time_code is not None: # End of display of active captions - if self.has_active_caption(): - self.push_active_caption_to_model(time_code) + self.push_active_caption_to_model(time_code) - self.push_buffered_to_active_captions() + # Send the current buffered caption to the active captions list + if not self.buffered_caption.get_id(): + self.count += 1 + self.buffered_caption.set_id("caption" + str(self.count)) + + self.active_caption = self.buffered_caption + self.new_buffered_caption() if temporary_caption is not None: self.buffered_caption = temporary_caption @@ -156,7 +145,7 @@ def push_active_caption_to_model(self, time_code: SmpteTimeCode, clear_active_ca previous_caption.set_end(time_code) if clear_active_caption: - self.reset_active_caption() + self.active_caption = None self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) @@ -183,9 +172,7 @@ def paint_on_active_caption(self, time_code: SmpteTimeCode): self.push_active_caption_to_model(time_code) # Initialize new buffered caption - self.reset_active_caption() - self.active_caption.set_caption_style(active_style) - self.initialize_active_caption(time_code) + self.new_active_caption(time_code, active_style) if len(copied_lines) > 0: # Set remaining lines to the new buffered caption @@ -218,11 +205,10 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: if not self.has_active_caption(): # If there is no current active caption, initialize an empty new paragraph - self.reset_active_caption() - self.active_caption.set_caption_style(SccCaptionStyle.RollUp) + self.new_active_caption(time_code, SccCaptionStyle.RollUp) if self.active_caption.get_begin() is None: - self.initialize_active_caption(time_code) + self.active_caption.set_begin(time_code) # Ignore PACs for rows 5-11, but get indent from PACs for rows 1-4 and 12-15. (Roll-Up) if pac_row in range(5, 12): @@ -349,8 +335,7 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim if not self.has_active_caption(): # If there is currently no active caption, initialize an empty new paragraph - self.reset_active_caption() - self.initialize_active_caption(time_code) + self.new_active_caption(time_code, SccCaptionStyle.RollUp) self.active_caption.set_caption_style(SccCaptionStyle.RollUp) self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW, 0) @@ -361,7 +346,7 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim elif control_code is SccControlCode.EOC: # Display caption (Pop-On) - self.set_buffered_caption_begin_time(time_code) + self.buffered_caption.set_begin(time_code) self.flip_buffered_to_active_captions(time_code) if self.has_active_caption(): @@ -388,7 +373,7 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim elif control_code is SccControlCode.ENM: # Erase buffered caption - self.reset_buffered_caption() + self.new_buffered_caption() elif control_code is SccControlCode.TO1: self.get_caption_to_process().indent_cursor(1) @@ -415,9 +400,7 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim previous_lines = self.active_caption.get_last_caption_lines(self.roll_up_depth - 1) # Initialize the new caption with the previous lines - self.reset_active_caption() - self.initialize_active_caption(time_code) - self.active_caption.set_caption_style(SccCaptionStyle.RollUp) + self.new_active_caption(time_code, SccCaptionStyle.RollUp) self.active_caption.set_lines(previous_lines) self.active_caption.set_cursor_at(ROLL_UP_BASE_ROW) @@ -493,4 +476,4 @@ def flush(self, time_code: Optional[SmpteTimeCode] = None): self.push_active_caption_to_model(time_code) # Remove the buffered caption - self.reset_buffered_caption() + self.new_buffered_caption() diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index cf708d40..f45c7a7e 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -1016,33 +1016,6 @@ def test_scc_mid_row_erase_displayed_memory_control_code(self): self.check_caption(p_list[5], "caption6", "00:00:03:19", None, "sagittis.") self.assertEqual(region_4, p_list[5].get_region()) - def test_scc_double_word_in_content(self): - scc_content = """"Scenarist_SCC V1.0 -01:02:53:14 9420 9420 94AE 94AE 9452 9452 97A1 97A1 20F2 E56D E56D 62E5 F220 9137 9137 9137 9137 942F 942F -01:02:55:14 942c 942c -""" - scc_disassembly = """\ -01:02:53:14 {RCL}{RCL}{ENM}{ENM}{1404}{1404}{TO1}{TO1} remember ♪♪♪♪{EOC}{EOC} -""" - self.assertEqual(scc_disassembly, to_disassembly(scc_content)) - - doc = to_model(scc_content) - self.assertIsNotNone(doc) - body = doc.get_body() - self.assertIsNotNone(body) - - div_list = list(body) - self.assertEqual(1, len(div_list)) - div = div_list[0] - self.assertIsNotNone(div) - - p_list = list(div) - self.assertEqual(1, len(p_list)) - - first_span = p_list[0][0] - first_text = first_span[0].get_text() - - self.assertEqual(" remember ♪♪", first_text) if __name__ == '__main__': unittest.main() From 934c73edb011e540f80e34cb616df588fd0e34df Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Mon, 4 Sep 2023 11:05:22 +0200 Subject: [PATCH 10/13] SCC: allow starting parsing from the middle of a caption Skip until a caption style is specified --- .../python/ttconv/scc/caption_paragraph.py | 7 + src/main/python/ttconv/scc/context.py | 34 ++-- src/test/python/test_scc_reader.py | 175 ++++++++++++++++++ 3 files changed, 204 insertions(+), 12 deletions(-) diff --git a/src/main/python/ttconv/scc/caption_paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py index 9452296a..6b30eabd 100644 --- a/src/main/python/ttconv/scc/caption_paragraph.py +++ b/src/main/python/ttconv/scc/caption_paragraph.py @@ -187,6 +187,10 @@ def get_lines(self) -> Dict[int, SccCaptionLine]: """Returns the paragraph lines per row""" return self._caption_lines + def is_empty(self) -> bool: + """Returns whether the paragraph has no content""" + return not self._caption_lines + def copy_lines(self) -> Dict[int, SccCaptionLine]: """Copy paragraph lines (without time attributes)""" lines_copy = {} @@ -265,6 +269,9 @@ def guess_text_alignment(self) -> TextAlignType: def get_line_right_offset(line: SccCaptionLine) -> int: return SCC_ROOT_CELL_RESOLUTION_COLUMNS - (line.get_indent() + line.get_length()) + if self.is_empty(): + return TextAlignType.start + # look for longest line longest_line = max(self._caption_lines.values(), key=lambda line: line.get_length()) diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py index ae1a756c..d2db5237 100644 --- a/src/main/python/ttconv/scc/context.py +++ b/src/main/python/ttconv/scc/context.py @@ -28,6 +28,7 @@ from __future__ import annotations import copy +import logging from typing import Optional, Type, Tuple from ttconv.model import Div @@ -44,6 +45,8 @@ ROLL_UP_BASE_ROW = 15 +LOGGER = logging.getLogger(__name__) + class SccContext: """SCC context for reader""" @@ -96,7 +99,7 @@ def new_buffered_caption(self): """Resets buffered caption""" self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) - def get_caption_to_process(self): + def get_caption_to_process(self) -> Optional[SccCaptionParagraph]: """Returns the caption currently being processed""" if self.current_style in (SccCaptionStyle.PaintOn, SccCaptionStyle.RollUp): # If the Paint-On or Roll-Up style is activated, write directly on active caption @@ -104,7 +107,9 @@ def get_caption_to_process(self): if self.current_style is SccCaptionStyle.PopOn: # For Pop-On style, write first on a buffered caption return self.buffered_caption - raise ValueError("SCC caption style not defined") + + LOGGER.warning("SCC caption style not defined") + return None def has_active_caption(self) -> bool: """Returns whether captions are being displayed or not""" @@ -147,7 +152,8 @@ def push_active_caption_to_model(self, time_code: SmpteTimeCode, clear_active_ca if clear_active_caption: self.active_caption = None - self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) + if not previous_caption.is_empty(): + self.div.push_child(previous_caption.to_paragraph(self.div.get_doc())) def backspace(self): """Move the cursors in a column to the left""" @@ -226,7 +232,7 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: self.buffered_caption.set_cursor_at(pac_row, pac_indent) else: - raise ValueError("SCC caption style not defined") + LOGGER.warning("SCC caption style not defined") self.current_color = pac.get_color() self.current_font_style = pac.get_font_style() @@ -248,8 +254,9 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime # In case of multiple mid-row codes, move right only after the first code # If there is already text on the current line - if processed_caption.get_current_text() is not None \ - and processed_caption.get_current_text().get_text() != "": + if processed_caption is not None \ + and processed_caption.get_current_text() is not None \ + and not processed_caption.get_current_text().is_empty(): # In case of paint-on replacing text if self.current_style is SccCaptionStyle.PaintOn \ @@ -264,7 +271,7 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime processed_caption.append_text(" ") processed_caption.new_caption_text() - else: + elif processed_caption is not None: processed_caption.append_text(" ") self.current_color = color @@ -279,10 +286,12 @@ def process_mid_row_code(self, mid_row_code: SccMidRowCode, time_code: SmpteTime if text_decoration is not None: self.current_text_decoration = text_decoration - processed_caption.append_text(" ") - processed_caption.new_caption_text() + if processed_caption is not None: + processed_caption.append_text(" ") + processed_caption.new_caption_text() - if processed_caption.get_caption_style() is SccCaptionStyle.PaintOn: + if processed_caption is not None \ + and processed_caption.get_caption_style() is SccCaptionStyle.PaintOn: processed_caption.get_current_text().set_begin(time_code) def process_attribute_code(self, attribute_code: SccAttributeCode): @@ -291,7 +300,8 @@ def process_attribute_code(self, attribute_code: SccAttributeCode): processed_caption = self.get_caption_to_process() if processed_caption is None or processed_caption.get_current_text() is None: - raise ValueError("No current SCC caption nor content initialized") + LOGGER.warning("No current SCC caption nor content initialized") + return if processed_caption.get_current_text() is not None and processed_caption.get_current_text().get_text(): processed_caption.new_caption_text() @@ -460,7 +470,7 @@ def process_text(self, word: str, time_code: SmpteTimeCode): self.active_caption.get_current_text().add_style_property(StyleProperties.FontStyle, self.current_font_style) self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) - else: + elif self.current_style is SccCaptionStyle.PopOn: self.buffered_caption.append_text(word) self.buffered_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index f45c7a7e..74fa312a 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -1016,6 +1016,181 @@ def test_scc_mid_row_erase_displayed_memory_control_code(self): self.check_caption(p_list[5], "caption6", "00:00:03:19", None, "sagittis.") self.assertEqual(region_4, p_list[5].get_region()) + def test_scc_content_parsing_from_text(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + + def test_scc_content_parsing_from_mid_row_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 91ae 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 {I}ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + + def test_scc_content_parsing_from_control_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 942c 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 {EDM}ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) + + def test_scc_content_parsing_from_preamble_address_code(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 9370 6970 7375 6d00 942c 942f + +00:00:07:29 9420 94D0 646f 6c6f 7220 7369 7420 616d 6574 2c80 9470 636f 6e73 6563 7465 7475 7220 6164 6970 6973 6369 6e67 2065 6c69 742e 942c 942f + +00:00:09:07 942c +""" + + scc_disassembly = """\ +00:00:03:01 {1300}ipsum{EDM}{EOC} +00:00:07:29 {RCL}{1400}dolor sit amet,{1500}consectetur adipiscing elit.{EDM}{EOC} +00:00:09:07 {EDM} +""" + + self.assertEqual(scc_disassembly, to_disassembly(scc_content)) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_1, p_list[0].get_region()) if __name__ == '__main__': unittest.main() From c8a7459d70582d4ef1bd69da9a340ae46325f462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20NO=C3=8BL?= Date: Tue, 10 Oct 2023 17:14:43 +0200 Subject: [PATCH 11/13] SCC: supports left-hand cursor movements on current line --- src/main/python/ttconv/scc/caption_line.py | 18 ++++++--- .../python/ttconv/scc/caption_paragraph.py | 13 ++++++- src/main/python/ttconv/scc/caption_text.py | 5 +++ src/test/python/test_scc_reader.py | 38 +++++++++++++++++++ 4 files changed, 66 insertions(+), 8 deletions(-) diff --git a/src/main/python/ttconv/scc/caption_line.py b/src/main/python/ttconv/scc/caption_line.py index 10b40aa8..20e89991 100644 --- a/src/main/python/ttconv/scc/caption_line.py +++ b/src/main/python/ttconv/scc/caption_line.py @@ -58,9 +58,9 @@ def add_text(self, text: Union[SccCaptionText, str]): if self._current_text is None: # Initialize a new text element if necessary - self._texts.append(SccCaptionText(text)) + self._texts.append(SccCaptionText()) self._current_text = self._texts[-1] - self._cursor = self._current_text.get_length() + self._append_text(text) else: remaining_text = text @@ -71,8 +71,7 @@ def add_text(self, text: Union[SccCaptionText, str]): text_to_write = remaining_text[:available] # Replace current text element content - self._current_text.append(text_to_write) - self.set_cursor(self._cursor + len(text_to_write)) + self._append_text(text_to_write) remaining_text = remaining_text[available:] # If some text remains on the last text element @@ -80,12 +79,19 @@ def add_text(self, text: Union[SccCaptionText, str]): assert self._current_text is self._texts[-1] # Replace and append to current text element content - self._current_text.append(remaining_text) - self.set_cursor(self._cursor + len(remaining_text)) + self._append_text(remaining_text) else: raise ValueError("Unsupported text type for SCC caption line") + def _append_text(self, text: str): + """Appends text and update cursor position""" + self._current_text.append(text) + if self._cursor < 0: + self._cursor = 0 + + self.set_cursor(self._cursor + len(text)) + def indent(self, indent: int): """Indent current line""" self._indent += indent diff --git a/src/main/python/ttconv/scc/caption_paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py index 6b30eabd..84b971c7 100644 --- a/src/main/python/ttconv/scc/caption_paragraph.py +++ b/src/main/python/ttconv/scc/caption_paragraph.py @@ -167,7 +167,7 @@ def set_cursor_at(self, row: int, indent: Optional[int] = None): self._current_line = self._caption_lines.get(row) if indent is not None: - self._current_line.set_cursor(self._cursor[1] - self._current_line.get_indent()) + self._update_current_line_cursor() def get_cursor(self) -> (int, int): """Returns cursor coordinates""" @@ -181,7 +181,16 @@ def indent_cursor(self, indent: int): # If the current line is empty, set cursor indent as a line tabulation self._current_line.indent(indent) else: - self._current_line.set_cursor(self._cursor[1] - self._current_line.get_indent()) + self._update_current_line_cursor() + + def _update_current_line_cursor(self): + """Updates cursor position on current line""" + new_cursor_position = self._cursor[1] - self._current_line.get_indent() + + if new_cursor_position < 0: + self._current_line.indent(new_cursor_position) + + self._current_line.set_cursor(new_cursor_position) def get_lines(self) -> Dict[int, SccCaptionLine]: """Returns the paragraph lines per row""" diff --git a/src/main/python/ttconv/scc/caption_text.py b/src/main/python/ttconv/scc/caption_text.py index 886f1430..17ebcc0b 100644 --- a/src/main/python/ttconv/scc/caption_text.py +++ b/src/main/python/ttconv/scc/caption_text.py @@ -79,6 +79,11 @@ def is_empty(self) -> bool: def append(self, text: str): """Add or replace text content at cursor position""" + if self._cursor < 0: + # Insert space characters before current text + self._text = ' ' * -self._cursor + self._text + self._cursor = 0 + # print("Append text: ", text, "to", self._text, "at", self._cursor) self._text = self._text[:self._cursor] + text + self._text[(self._cursor + len(text)):] self._cursor += len(text) diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index 74fa312a..6563b905 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -1192,5 +1192,43 @@ def test_scc_content_parsing_from_preamble_address_code(self): "consectetur adipiscing elit.") self.assertEqual(region_1, p_list[0].get_region()) + def test_scc_with_negative_cursor(self): + scc_content = """Scenarist_SCC V1.0 +00:00:01:00 94AE 94AE 9420 9420 94F8 94F8 45E5 E5E3 68A1 94F4 94F4 D3E3 61F2 79A1 942C 942C 942F 942F +00:00:02:00 942F 942F +""" + scc_disassembly_expected = """\ +00:00:01:00 {ENM}{ENM}{RCL}{RCL}{1516}{1516}Eeech!{1508}{1508}Scary!{EDM}{EDM}{EOC}{EOC} +00:00:02:00 {EOC}{EOC} +""" + scc_disassembly = to_disassembly(scc_content) + self.assertEqual(scc_disassembly_expected, scc_disassembly) + + doc = to_model(scc_content) + + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 12, 16, doc.get_cell_resolution()) + self.check_region_extent(region_1, 14, 1, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption1", "00:00:01:12", "00:00:02:01", "Scary! Eeech!") + self.assertEqual(region_1, p_list[0].get_region()) + + if __name__ == '__main__': unittest.main() From 01ff7de524a7047241baaf93aacf0f2dfb66a14e Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Thu, 12 Oct 2023 15:13:51 +0200 Subject: [PATCH 12/13] SCC: make reader more resilient to malformed SCC content Paragraphs have PopOn style and empty text line by default --- src/main/python/ttconv/scc/caption_line.py | 63 ++-- .../python/ttconv/scc/caption_paragraph.py | 38 ++- src/main/python/ttconv/scc/caption_style.py | 5 + src/main/python/ttconv/scc/context.py | 31 +- src/main/python/ttconv/scc/disassembly.py | 68 ++++ src/main/python/ttconv/scc/line.py | 60 +--- src/test/python/test_scc_content.py | 8 +- src/test/python/test_scc_paragraph.py | 6 +- src/test/python/test_scc_reader.py | 309 ++++++++++++++++-- 9 files changed, 438 insertions(+), 150 deletions(-) diff --git a/src/main/python/ttconv/scc/caption_line.py b/src/main/python/ttconv/scc/caption_line.py index 20e89991..d144f7d5 100644 --- a/src/main/python/ttconv/scc/caption_line.py +++ b/src/main/python/ttconv/scc/caption_line.py @@ -28,7 +28,7 @@ from __future__ import annotations import logging -from typing import Optional, List, Union +from typing import List, Union from ttconv.scc.caption_text import SccCaptionText @@ -38,13 +38,18 @@ class SccCaptionLine: """Caption paragraph line""" + @staticmethod + def default(): + """Initializes a default caption paragraph line""" + return SccCaptionLine(0, 0) + def __init__(self, row: int, indent: int): - self._texts: List[SccCaptionText] = [] self._row: int = row # Row in the active area self._indent: int = indent # Indentation in the active area self._cursor: int = 0 # Position of the cursor on the line - self._current_text: Optional[SccCaptionText] = None # Text content where the cursor is + self._current_text: SccCaptionText = SccCaptionText() # Text content where the cursor is + self._texts: List[SccCaptionText] = [self._current_text] def add_text(self, text: Union[SccCaptionText, str]): """Add text to line""" @@ -55,31 +60,23 @@ def add_text(self, text: Union[SccCaptionText, str]): self._cursor = self.get_length() elif isinstance(text, str): + remaining_text = text - if self._current_text is None: - # Initialize a new text element if necessary - self._texts.append(SccCaptionText()) - self._current_text = self._texts[-1] - self._append_text(text) - - else: - remaining_text = text - - # While the cursor is not on the last text element, and some text remains - while self._current_text is not self._texts[-1] and len(remaining_text) > 0: - available = self._current_text.get_length() - self._current_text.get_cursor() - text_to_write = remaining_text[:available] + # While the cursor is not on the last text element, and some text remains + while self._current_text is not self._texts[-1] and len(remaining_text) > 0: + available = self._current_text.get_length() - self._current_text.get_cursor() + text_to_write = remaining_text[:available] - # Replace current text element content - self._append_text(text_to_write) - remaining_text = remaining_text[available:] + # Replace current text element content + self._append_text(text_to_write) + remaining_text = remaining_text[available:] - # If some text remains on the last text element - if len(remaining_text) > 0: - assert self._current_text is self._texts[-1] + # If some text remains on the last text element + if len(remaining_text) > 0: + assert self._current_text is self._texts[-1] - # Replace and append to current text element content - self._append_text(remaining_text) + # Replace and append to current text element content + self._append_text(remaining_text) else: raise ValueError("Unsupported text type for SCC caption line") @@ -96,7 +93,7 @@ def indent(self, indent: int): """Indent current line""" self._indent += indent - def get_current_text(self) -> Optional[SccCaptionText]: + def get_current_text(self) -> SccCaptionText: """Returns current text content""" return self._current_text @@ -145,24 +142,26 @@ def get_indent(self) -> int: def clear(self): """Clears the line text contents""" self._texts.clear() - self._current_text = None + self._current_text = SccCaptionText() + self._texts = [self._current_text] self.set_cursor(0) def is_empty(self) -> bool: """Returns whether the line text is empty or not""" - # no caption texts or an empty text - return len(self._texts) == 0 or (len(self._texts) == 1 and self._texts[-1].get_text() == "") + return self.get_length() == 0 def get_leading_spaces(self) -> int: """Returns the number of leading space characters of the line""" index = 0 leading_spaces = 0 - first_text = self.get_texts()[index].get_text() - while first_text.isspace() and index < len(self.get_texts()): - leading_spaces += len(first_text) - index += 1 + while index < len(self.get_texts()): first_text = self.get_texts()[index].get_text() + if first_text.isspace(): + leading_spaces += len(first_text) + index += 1 + else: + break return leading_spaces + len(first_text) - len(first_text.lstrip()) diff --git a/src/main/python/ttconv/scc/caption_paragraph.py b/src/main/python/ttconv/scc/caption_paragraph.py index 84b971c7..e4be5d42 100644 --- a/src/main/python/ttconv/scc/caption_paragraph.py +++ b/src/main/python/ttconv/scc/caption_paragraph.py @@ -52,6 +52,11 @@ class SccCaptionParagraph: """Caption paragraph""" + @staticmethod + def default(caption_style: SccCaptionStyle = SccCaptionStyle.Unknown): + """Initializes a default caption paragraph""" + return SccCaptionParagraph(caption_style=caption_style) + def __init__(self, safe_area_x_offset: int = 0, safe_area_y_offset: int = 0, caption_style: SccCaptionStyle = SccCaptionStyle.Unknown): self._caption_id: str = "" @@ -70,6 +75,8 @@ def __init__(self, safe_area_x_offset: int = 0, safe_area_y_offset: int = 0, self._current_line: Optional[SccCaptionLine] = None # Lines per row in the active area (will be separated by line-breaks) self._caption_lines: Dict[int, SccCaptionLine] = {} + # Initialize first default line + self.new_caption_line() self._caption_style: SccCaptionStyle = caption_style self._style_properties = {} @@ -114,14 +121,12 @@ def get_caption_style(self) -> SccCaptionStyle: """Returns the caption style""" return self._caption_style - def get_current_line(self) -> Optional[SccCaptionLine]: + def get_current_line(self) -> SccCaptionLine: """Returns the current caption line""" return self._current_line - def get_current_text(self) -> Optional[SccCaptionText]: + def get_current_text(self) -> SccCaptionText: """Returns the current caption text""" - if self._current_line is None: - return None return self._current_line.get_current_text() def append_text(self, text: str): @@ -155,9 +160,14 @@ def get_style_property(self, style_property) -> Optional: def set_cursor_at(self, row: int, indent: Optional[int] = None): """Set cursor position and initialize a new line if necessary""" - # Remove current line if empty (useless) - if self._current_line is not None and self._current_line.is_empty(): - del self._caption_lines[self._current_line.get_row()] + if self._caption_lines.get(self._current_line.get_row()) is not None: + # Set current line if necessary + if self._caption_lines.get(self._current_line.get_row()) is not self._current_line: + self._current_line = self._caption_lines.get(self._current_line.get_row()) + + # Remove current line if empty (i.e. useless) + if self._current_line.is_empty(): + del self._caption_lines[self._current_line.get_row()] self._cursor = (row, indent if indent is not None else 0) @@ -198,7 +208,11 @@ def get_lines(self) -> Dict[int, SccCaptionLine]: def is_empty(self) -> bool: """Returns whether the paragraph has no content""" - return not self._caption_lines + return self._get_length() == 0 + + def _get_length(self) -> int: + """Returns the total length of contained text""" + return sum([line.get_length() for line in self._caption_lines.values()]) def copy_lines(self) -> Dict[int, SccCaptionLine]: """Copy paragraph lines (without time attributes)""" @@ -217,10 +231,6 @@ def copy_lines(self) -> Dict[int, SccCaptionLine]: def new_caption_text(self): """Appends a new caption text content, and keeps reference on it""" - if self._current_line is None: - LOGGER.warning("Add a new caption line to add new caption text") - self.new_caption_line() - self._current_line.add_text(SccCaptionText()) def new_caption_line(self): @@ -245,7 +255,7 @@ def roll_up(self): def get_origin(self) -> CoordinateType: """Computes and returns the current paragraph origin, based on its content""" - if len(self._caption_lines) > 0: + if not self.is_empty(): x_offsets = [text.get_indent() for text in self._caption_lines.values()] y_offsets = [text.get_row() - 1 for text in self._caption_lines.values()] @@ -255,7 +265,7 @@ def get_origin(self) -> CoordinateType: def get_extent(self) -> ExtentType: """Computes and returns the current paragraph extent, based on its content""" - if len(self._caption_lines) == 0: + if self.is_empty(): return get_extent_from_dimensions(0, 0) paragraph_rows = self._caption_lines.keys() diff --git a/src/main/python/ttconv/scc/caption_style.py b/src/main/python/ttconv/scc/caption_style.py index 9243188d..544073be 100644 --- a/src/main/python/ttconv/scc/caption_style.py +++ b/src/main/python/ttconv/scc/caption_style.py @@ -58,3 +58,8 @@ class SccCaptionStyle(Enum): # - EDM (to erase the displayed caption, optional) # - EOC (to display the current caption) PopOn = 3 + + @staticmethod + def default(): + """Returns the default caption style""" + return SccCaptionStyle.PopOn diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py index d2db5237..c4572451 100644 --- a/src/main/python/ttconv/scc/context.py +++ b/src/main/python/ttconv/scc/context.py @@ -66,12 +66,12 @@ def __init__(self, safe_area_x_offset: int, safe_area_y_offset: int, config: Opt self.previous_word: Optional[SccWord] = None self.previous_word_type: Optional[Type] = None + # Caption style (Pop-on, Roll-up, Paint-on) currently processed + self.current_style = SccCaptionStyle.default() # Buffered caption being built self.buffered_caption = None # Captions being displayed self.active_caption: Optional[SccCaptionParagraph] = None - # Caption style (Pop-on, Roll-up, Paint-on) currently processed - self.current_style = SccCaptionStyle.Unknown # Roll-up caption number of lines self.roll_up_depth: int = 0 @@ -97,7 +97,7 @@ def new_active_caption(self, begin_time_code: SmpteTimeCode, caption_style: SccC def new_buffered_caption(self): """Resets buffered caption""" - self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset) + self.buffered_caption = SccCaptionParagraph(self.safe_area_x_offset, self.safe_area_y_offset, SccCaptionStyle.PopOn) def get_caption_to_process(self) -> Optional[SccCaptionParagraph]: """Returns the caption currently being processed""" @@ -204,9 +204,6 @@ def process_preamble_address_code(self, pac: SccPreambleAddressCode, time_code: self.active_caption.set_cursor_at(pac_row, pac_indent) - if self.active_caption.get_current_text() is None: - self.active_caption.new_caption_text() - elif self.current_style is SccCaptionStyle.RollUp: if not self.has_active_caption(): @@ -299,11 +296,11 @@ def process_attribute_code(self, attribute_code: SccAttributeCode): processed_caption = self.get_caption_to_process() - if processed_caption is None or processed_caption.get_current_text() is None: + if processed_caption is None: LOGGER.warning("No current SCC caption nor content initialized") return - if processed_caption.get_current_text() is not None and processed_caption.get_current_text().get_text(): + if processed_caption.get_current_text().get_text(): processed_caption.new_caption_text() if attribute_code.is_background(): @@ -321,11 +318,6 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim # Start a new Pop-On caption self.current_style = SccCaptionStyle.PopOn - if self.buffered_caption.get_caption_style() is SccCaptionStyle.Unknown: - self.buffered_caption.set_caption_style(self.current_style) - self.buffered_caption.new_caption_line() - self.buffered_caption.new_caption_text() - elif control_code is SccControlCode.RDC: # Start a new Paint-On caption self.current_style = SccCaptionStyle.PaintOn @@ -398,6 +390,11 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim # Roll the displayed caption up one row (Roll-Up) if self.has_active_caption(): + if self.active_caption.get_caption_style() is not SccCaptionStyle.RollUp: + LOGGER.warning("Cannot roll-up active %s-styled caption, erase it instead.", self.active_caption.get_caption_style().name) + self.push_active_caption_to_model(time_code) + return + if self.active_caption.get_current_text().is_empty(): self.count -= 1 previous_lines = [] @@ -432,6 +429,10 @@ def process_control_code(self, control_code: SccControlCode, time_code: SmpteTim def process_text(self, word: str, time_code: SmpteTimeCode): """Processes SCC text words""" if self.current_style is SccCaptionStyle.PaintOn: + if not self.has_active_caption(): + LOGGER.warning("Initialize active caption buffer to handle paint-on text at %s", time_code) + self.paint_on_active_caption(time_code) + if word.startswith(" "): if self.active_caption.get_caption_style() is not SccCaptionStyle.PaintOn: @@ -464,6 +465,10 @@ def process_text(self, word: str, time_code: SmpteTimeCode): self.active_caption.get_current_text().add_style_property(StyleProperties.TextDecoration, self.current_text_decoration) elif self.current_style is SccCaptionStyle.RollUp: + if not self.has_active_caption(): + LOGGER.warning("Initialize active caption buffer to handle roll-up text at %s", time_code) + self.new_active_caption(time_code, self.current_style) + self.active_caption.append_text(word) self.active_caption.get_current_text().add_style_property(StyleProperties.Color, self.current_color) diff --git a/src/main/python/ttconv/scc/disassembly.py b/src/main/python/ttconv/scc/disassembly.py index b4c788ad..d4bf88fa 100644 --- a/src/main/python/ttconv/scc/disassembly.py +++ b/src/main/python/ttconv/scc/disassembly.py @@ -26,10 +26,18 @@ """SCC disassembly functions""" import logging +from ttconv.scc.codes.attribute_codes import SccAttributeCode +from ttconv.scc.codes.control_codes import SccControlCode +from ttconv.scc.codes.extended_characters import SccExtendedCharacter +from ttconv.scc.codes.mid_row_codes import SccMidRowCode +from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode +from ttconv.scc.codes.special_characters import SccSpecialCharacter +from ttconv.scc.word import SccWord from ttconv.style_properties import ColorType, NamedColors, FontStyleType, TextDecorationType LOGGER = logging.getLogger(__name__) + def get_color_disassembly(color: ColorType) -> str: """Get color disassembly code""" disassembly = "" @@ -81,3 +89,63 @@ def get_text_decoration_disassembly(text_decoration: TextDecorationType) -> str: if text_decoration is not None and text_decoration.underline is True: return "U" return "" + + +def get_scc_word_disassembly(scc_word: SccWord) -> str: + """Returns the disassembly code for specified SCC word""" + if scc_word.value == 0x0000: + return "{}" + + if scc_word.byte_1 < 0x20: + + attribute_code = SccAttributeCode.find(scc_word.value) + control_code = SccControlCode.find(scc_word.value) + mid_row_code = SccMidRowCode.find(scc_word.value) + pac = SccPreambleAddressCode.find(scc_word.byte_1, scc_word.byte_2) + spec_char = SccSpecialCharacter.find(scc_word.value) + extended_char = SccExtendedCharacter.find(scc_word.value) + + if pac is not None: + disassembly_code = f"{{{pac.get_row():02}" + color = pac.get_color() + indent = pac.get_indent() + if indent is not None and indent > 0: + disassembly_code += f"{indent :02}" + elif color is not None: + disassembly_code += get_color_disassembly(color) + disassembly_code += get_font_style_disassembly(pac.get_font_style()) + disassembly_code += get_text_decoration_disassembly(pac.get_text_decoration()) + else: + disassembly_code += "00" + disassembly_code += "}" + return disassembly_code + + if attribute_code is not None: + disassembly_code = "{" + disassembly_code += "B" if attribute_code.is_background() else "" + disassembly_code += get_color_disassembly(attribute_code.get_color()) + disassembly_code += get_text_decoration_disassembly(attribute_code.get_text_decoration()) + disassembly_code += "}" + return disassembly_code + + if mid_row_code is not None: + disassembly_code = "{" + disassembly_code += get_color_disassembly(mid_row_code.get_color()) + disassembly_code += get_font_style_disassembly(mid_row_code.get_font_style()) + disassembly_code += get_text_decoration_disassembly(mid_row_code.get_text_decoration()) + disassembly_code += "}" + return disassembly_code + + if control_code is not None: + return "{" + control_code.get_name() + "}" + + if spec_char is not None: + return spec_char.get_unicode_value() + + if extended_char is not None: + return extended_char.get_unicode_value() + + LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value)) + return "{??}" + + return scc_word.to_text() diff --git a/src/main/python/ttconv/scc/line.py b/src/main/python/ttconv/scc/line.py index 0fc6f933..b86d4261 100644 --- a/src/main/python/ttconv/scc/line.py +++ b/src/main/python/ttconv/scc/line.py @@ -39,7 +39,7 @@ from ttconv.scc.codes.preambles_address_codes import SccPreambleAddressCode from ttconv.scc.codes.special_characters import SccSpecialCharacter from ttconv.scc.context import SccContext -from ttconv.scc.disassembly import get_color_disassembly, get_font_style_disassembly, get_text_decoration_disassembly +from ttconv.scc.disassembly import get_scc_word_disassembly from ttconv.scc.word import SccWord from ttconv.time_code import SmpteTimeCode, FPS_30 @@ -99,63 +99,7 @@ def to_disassembly(self) -> str: disassembly_line = str(self.time_code) + "\t" for scc_word in self.scc_words: - - if scc_word.value == 0x0000: - disassembly_line += "{}" - continue - - if scc_word.byte_1 < 0x20: - - attribute_code = SccAttributeCode.find(scc_word.value) - control_code = SccControlCode.find(scc_word.value) - mid_row_code = SccMidRowCode.find(scc_word.value) - pac = SccPreambleAddressCode.find(scc_word.byte_1, scc_word.byte_2) - spec_char = SccSpecialCharacter.find(scc_word.value) - extended_char = SccExtendedCharacter.find(scc_word.value) - - if pac is not None: - disassembly_line += f"{{{pac.get_row():02}" - color = pac.get_color() - indent = pac.get_indent() - if indent is not None and indent > 0: - disassembly_line += f"{indent :02}" - elif color is not None: - disassembly_line += get_color_disassembly(color) - disassembly_line += get_font_style_disassembly(pac.get_font_style()) - disassembly_line += get_text_decoration_disassembly(pac.get_text_decoration()) - else: - disassembly_line += "00" - disassembly_line += "}" - - elif attribute_code is not None: - disassembly_line += "{" - disassembly_line += "B" if attribute_code.is_background() else "" - disassembly_line += get_color_disassembly(attribute_code.get_color()) - disassembly_line += get_text_decoration_disassembly(attribute_code.get_text_decoration()) - disassembly_line += "}" - - elif mid_row_code is not None: - disassembly_line += "{" - disassembly_line += get_color_disassembly(mid_row_code.get_color()) - disassembly_line += get_font_style_disassembly(mid_row_code.get_font_style()) - disassembly_line += get_text_decoration_disassembly(mid_row_code.get_text_decoration()) - disassembly_line += "}" - - elif control_code is not None: - disassembly_line += "{" + control_code.get_name() + "}" - - elif spec_char is not None: - disassembly_line += spec_char.get_unicode_value() - - elif extended_char is not None: - disassembly_line += extended_char.get_unicode_value() - - else: - disassembly_line += "{??}" - LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value)) - - else: - disassembly_line += scc_word.to_text() + disassembly_line += get_scc_word_disassembly(scc_word) return disassembly_line diff --git a/src/test/python/test_scc_content.py b/src/test/python/test_scc_content.py index 2de0cd93..1e099fb9 100644 --- a/src/test/python/test_scc_content.py +++ b/src/test/python/test_scc_content.py @@ -50,11 +50,11 @@ def test_line(self): self.assertEqual(7, caption_line.get_row()) self.assertEqual(4, caption_line.get_indent()) - self.assertIsNone(caption_line.get_current_text()) + self.assertTrue(caption_line.is_empty()) self.assertEqual(0, caption_line.get_cursor()) self.assertEqual(0, caption_line.get_length()) - self.assertTrue(caption_line.is_empty()) - self.assertListEqual([], caption_line.get_texts()) + self.assertEqual(1, len(caption_line.get_texts())) + self.assertTrue(caption_line.get_current_text().is_empty()) caption_line.set_cursor(10) self.assertEqual(0, caption_line.get_cursor()) @@ -134,7 +134,7 @@ def test_line(self): caption_line.clear() self.assertEqual(0, caption_line.get_cursor()) self.assertEqual(0, caption_line.get_length()) - self.assertListEqual([], caption_line.get_texts()) + self.assertTrue(caption_line.is_empty()) def test_line_text_leading_and_trailing_spaces(self): line = SccCaptionLine(0, 0) diff --git a/src/test/python/test_scc_paragraph.py b/src/test/python/test_scc_paragraph.py index f3e51f3c..ae489fdd 100644 --- a/src/test/python/test_scc_paragraph.py +++ b/src/test/python/test_scc_paragraph.py @@ -49,12 +49,8 @@ def test_content(self): self.assertEqual(4, caption_paragraph._safe_area_x_offset) self.assertEqual(2, caption_paragraph._safe_area_y_offset) - self.assertIsNone(caption_paragraph.get_current_text()) - self.assertEqual(0, len(caption_paragraph._caption_lines)) - - caption_paragraph.new_caption_text() self.assertEqual(caption_paragraph.get_current_line(), caption_paragraph.get_lines()[0]) - self.assertIsNotNone(caption_paragraph.get_current_text()) + self.assertTrue(caption_paragraph.get_current_text().is_empty()) self.assertEqual(1, len(caption_paragraph._caption_lines)) caption_paragraph.set_cursor_at(4, 4) diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index 6563b905..76f647e7 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -1016,7 +1016,7 @@ def test_scc_mid_row_erase_displayed_memory_control_code(self): self.check_caption(p_list[5], "caption6", "00:00:03:19", None, "sagittis.") self.assertEqual(region_4, p_list[5].get_region()) - def test_scc_content_parsing_from_text(self): + def test_scc_content_starting_with_text(self): scc_content = """\ Scenarist_SCC V1.0 @@ -1040,11 +1040,18 @@ def test_scc_content_parsing_from_text(self): region_1 = doc.get_region("pop1") self.assertIsNotNone(region_1) - self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) - self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_region_origin(region_1, 4, 1, doc.get_cell_resolution()) + self.check_region_extent(region_1, 5, 16, doc.get_cell_resolution()) self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + region_2 = doc.get_region("pop2") + self.assertIsNotNone(region_2) + self.check_region_origin(region_2, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_2, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_2, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_2, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + body = doc.get_body() self.assertIsNotNone(body) @@ -1054,13 +1061,16 @@ def test_scc_content_parsing_from_text(self): self.assertIsNotNone(div) p_list = list(div) - self.assertEqual(1, len(p_list)) + self.assertEqual(2, len(p_list)) - self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, - "consectetur adipiscing elit.") + self.check_caption(p_list[0], "caption1", "00:00:03:06", "00:00:08:26", "ipsum") self.assertEqual(region_1, p_list[0].get_region()) - def test_scc_content_parsing_from_mid_row_code(self): + self.check_caption(p_list[1], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_2, p_list[1].get_region()) + + def test_scc_content_starting_with_mid_row_code(self): scc_content = """\ Scenarist_SCC V1.0 @@ -1084,11 +1094,18 @@ def test_scc_content_parsing_from_mid_row_code(self): region_1 = doc.get_region("pop1") self.assertIsNotNone(region_1) - self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) - self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_region_origin(region_1, 4, 1, doc.get_cell_resolution()) + self.check_region_extent(region_1, 6, 16, doc.get_cell_resolution()) self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + region_2 = doc.get_region("pop2") + self.assertIsNotNone(region_2) + self.check_region_origin(region_2, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_2, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_2, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_2, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + body = doc.get_body() self.assertIsNotNone(body) @@ -1098,13 +1115,16 @@ def test_scc_content_parsing_from_mid_row_code(self): self.assertIsNotNone(div) p_list = list(div) - self.assertEqual(1, len(p_list)) + self.assertEqual(2, len(p_list)) - self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, - "consectetur adipiscing elit.") + self.check_caption(p_list[0], "caption1", "00:00:03:07", "00:00:08:26", " ipsum") self.assertEqual(region_1, p_list[0].get_region()) - def test_scc_content_parsing_from_control_code(self): + self.check_caption(p_list[1], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_2, p_list[1].get_region()) + + def test_scc_content_starting_with_control_code(self): scc_content = """\ Scenarist_SCC V1.0 @@ -1128,11 +1148,18 @@ def test_scc_content_parsing_from_control_code(self): region_1 = doc.get_region("pop1") self.assertIsNotNone(region_1) - self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) - self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_region_origin(region_1, 4, 1, doc.get_cell_resolution()) + self.check_region_extent(region_1, 5, 16, doc.get_cell_resolution()) self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + region_2 = doc.get_region("pop2") + self.assertIsNotNone(region_2) + self.check_region_origin(region_2, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_2, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_2, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_2, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + body = doc.get_body() self.assertIsNotNone(body) @@ -1142,13 +1169,16 @@ def test_scc_content_parsing_from_control_code(self): self.assertIsNotNone(div) p_list = list(div) - self.assertEqual(1, len(p_list)) + self.assertEqual(2, len(p_list)) - self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, - "consectetur adipiscing elit.") + self.check_caption(p_list[0], "caption1", "00:00:03:07", "00:00:08:26", "ipsum") self.assertEqual(region_1, p_list[0].get_region()) - def test_scc_content_parsing_from_preamble_address_code(self): + self.check_caption(p_list[1], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_2, p_list[1].get_region()) + + def test_scc_content_starting_with_preamble_address_code(self): scc_content = """\ Scenarist_SCC V1.0 @@ -1172,11 +1202,18 @@ def test_scc_content_parsing_from_preamble_address_code(self): region_1 = doc.get_region("pop1") self.assertIsNotNone(region_1) - self.check_region_origin(region_1, 4, 15, doc.get_cell_resolution()) - self.check_region_extent(region_1, 28, 2, doc.get_cell_resolution()) + self.check_region_origin(region_1, 4, 14, doc.get_cell_resolution()) + self.check_region_extent(region_1, 5, 3, doc.get_cell_resolution()) self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + region_2 = doc.get_region("pop2") + self.assertIsNotNone(region_2) + self.check_region_origin(region_2, 4, 15, doc.get_cell_resolution()) + self.check_region_extent(region_2, 28, 2, doc.get_cell_resolution()) + self.check_element_style(region_2, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_2, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + body = doc.get_body() self.assertIsNotNone(body) @@ -1186,12 +1223,15 @@ def test_scc_content_parsing_from_preamble_address_code(self): self.assertIsNotNone(div) p_list = list(div) - self.assertEqual(1, len(p_list)) + self.assertEqual(2, len(p_list)) - self.check_caption(p_list[0], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, - "consectetur adipiscing elit.") + self.check_caption(p_list[0], "caption1", "00:00:03:07", "00:00:08:26", "ipsum") self.assertEqual(region_1, p_list[0].get_region()) + self.check_caption(p_list[1], "caption2", "00:00:08:26", "00:00:09:09", "dolor sit amet,", Br, + "consectetur adipiscing elit.") + self.assertEqual(region_2, p_list[1].get_region()) + def test_scc_with_negative_cursor(self): scc_content = """Scenarist_SCC V1.0 00:00:01:00 94AE 94AE 9420 9420 94F8 94F8 45E5 E5E3 68A1 94F4 94F4 D3E3 61F2 79A1 942C 942C 942F 942F @@ -1229,6 +1269,227 @@ def test_scc_with_negative_cursor(self): self.check_caption(p_list[0], "caption1", "00:00:01:12", "00:00:02:01", "Scary! Eeech!") self.assertEqual(region_1, p_list[0].get_region()) + def test_scc_content_starting_with_backspace(self): + scc_content = """Scenarist_SCC V1.0 +10:01:44;17 94AE 9420 9470 9723 946E 80C1 92B0 20ec 6120 e6e9 6e20 64e5 7320 616e 6edc e573 2031 38b0 b02c 942C 8080 8080 942F +""" + + expected_scc_disassembly = """\ +10:01:44;17 {ENM}{RCL}{1500}{TO3}{15WhI}{??}À la fin des années 1800,{EDM}{}{}{EOC} +""" + + scc_disassembly = to_disassembly(scc_content) + self.assertEqual(expected_scc_disassembly, scc_disassembly) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 16, doc.get_cell_resolution()) + self.check_region_extent(region_1, 25, 1, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption1", "10:01:45;10", None, "À la fin des années 1800,") + self.assertEqual(region_1, p_list[0].get_region()) + + def test_scc_content_roll_up_empty_caption(self): + scc_content = """Scenarist_SCC V1.0 +10:03:20:16 94ad 94ad 9426 9426 92d0 92d0 a880 9138 9138 2064 942c 942c e575 f820 76ef e9f8 2c20 e56e 2061 6e67 ec61 e973 29ba +""" + expected_scc_disassembly = """\ +10:03:20:16 {CR}{CR}{RU3}{RU3}{0300}{0300}(àà d{EDM}{EDM}eux voix, en anglais): +""" + + scc_disassembly = to_disassembly(scc_content) + self.assertEqual(expected_scc_disassembly, scc_disassembly) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + region_1 = doc.get_region("rollup1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 2, doc.get_cell_resolution()) + self.check_region_extent(region_1, 4, 15, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.after) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + region_2 = doc.get_region("rollup2") + self.assertIsNotNone(region_2) + self.check_region_origin(region_2, 4, 2, doc.get_cell_resolution()) + self.check_region_extent(region_2, 22, 15, doc.get_cell_resolution()) + self.check_element_style(region_2, StyleProperties.DisplayAlign, DisplayAlignType.after) + self.check_element_style(region_2, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(2, len(p_list)) + + self.check_caption(p_list[0], "caption1", "10:03:20:18", "10:03:20:24", "(à d") + self.assertEqual(region_1, p_list[0].get_region()) + + self.check_caption(p_list[1], "caption2", "10:03:20:24", None, "eux voix, en anglais):") + self.assertEqual(region_2, p_list[1].get_region()) + + def test_scc_text_without_style_nor_position(self): + scc_content = """Scenarist_SCC V1.0 +10:55:31:29 2080 3280 2046 3180 +""" + expected_scc_disassembly = """\ +10:55:31:29 2 F1 +""" + + scc_disassembly = to_disassembly(scc_content) + self.assertEqual(expected_scc_disassembly, scc_disassembly) + + doc = to_model(scc_content) + self.assertIsNotNone(doc) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + self.assertEqual(0, len(list(div))) + + def test_scc_content_with_paragraph_of_spaces(self): + scc_content = """\ +Scenarist_SCC V1.0 + +00:00:03:01 9370 6970 7375 6d00 942c 942f + +09:59:59:18 9420 9476 9723 2080 9420 942c 942f 9420 97f4 a8e3 616d e5f2 6120 7368 75f4 f4e5 f229 9452 9723 91ae bce3 6861 eff4 e9e3 20e3 61e3 ef70 68ef 6e79 9470 9723 91ae e5f8 e973 f473 20f4 68f2 ef75 6768 ef75 f420 70f2 ef67 f261 6d3e 9420 942c 942f 9420 94f4 a870 ec61 79e6 75ec 206d 7573 e9e3 2980 + +11:00:09:07 942c +""" + + expected_disassembly = """\ +00:00:03:01 {1300}ipsum{EDM}{EOC} +09:59:59:18 {RCL}{1512}{TO3} {RCL}{EDM}{EOC}{RCL}{1008}(camera shutter){1404}{TO3}{I}{RCL}{EDM}{EOC}{RCL}{1508}(playful music) +11:00:09:07 {EDM} +""" + + disassembly = to_disassembly(scc_content) + self.assertEqual(expected_disassembly, disassembly) + + doc = to_model(scc_content) + + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 14, doc.get_cell_resolution()) + self.check_region_extent(region_1, 5, 3, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + region_2 = doc.get_region("pop2") + self.assertIsNotNone(region_2) + self.check_region_origin(region_2, 19, 16, doc.get_cell_resolution()) + self.check_region_extent(region_2, 1, 1, doc.get_cell_resolution()) + self.check_element_style(region_2, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_2, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + region_3 = doc.get_region("pop3") + self.assertIsNotNone(region_3) + self.check_region_origin(region_3, 7, 11, doc.get_cell_resolution()) + self.check_region_extent(region_3, 27, 6, doc.get_cell_resolution()) + self.check_element_style(region_3, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_3, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(3, len(p_list)) + + self.check_caption(p_list[0], "caption1", "00:00:03:07", "09:59:59:25", "ipsum") + self.assertEqual(region_1, p_list[0].get_region()) + + self.check_caption(p_list[1], "caption2", "09:59:59:25", "10:00:01:06", " ") + self.assertEqual(region_2, p_list[1].get_region()) + + self.check_caption(p_list[2], "caption3", "10:00:01:06", "11:00:09:09", "(camera shutter)", Br, Br, Br, Br, + " ") + self.assertEqual(region_3, p_list[2].get_region()) + + def test_scc_content_trying_to_roll_up_pop_on_paragraph(self): + scc_content = """\ +Scenarist_SCC V1.0 + +11:19:24:05 9420 946E A861 7070 EC61 7564 E973 73E5 6DE5 6EF4 7329 9420 942C 942F + +11:19:30:17 9426 94AD 946E 3E3E 3E20 CE61 F2F2 61F4 E575 F2BA 2043 E5F4 F4E5 +""" + expected_disassembly = """\ +11:19:24:05 {RCL}{15WhI}(applaudissements){RCL}{EDM}{EOC} +11:19:30:17 {RU3}{CR}{15WhI}>>> Narrateur: Cette +""" + + disassembly = to_disassembly(scc_content) + self.assertEqual(expected_disassembly, disassembly) + + doc = to_model(scc_content) + + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 4, 16, doc.get_cell_resolution()) + self.check_region_extent(region_1, 18, 1, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + region_2 = doc.get_region("rollup2") + self.assertIsNotNone(region_2) + self.check_region_origin(region_2, 4, 2, doc.get_cell_resolution()) + self.check_region_extent(region_2, 20, 15, doc.get_cell_resolution()) + self.check_element_style(region_2, StyleProperties.DisplayAlign, DisplayAlignType.after) + self.check_element_style(region_2, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(2, len(p_list)) + + self.check_caption(p_list[0], "caption1", "11:19:24:19", "11:19:30:19", "(applaudissements)") + self.assertEqual(region_1, p_list[0].get_region()) + + self.check_caption(p_list[1], "caption2", "11:19:30:20", None, ">>> Narrateur: Cette") + self.assertEqual(region_2, p_list[1].get_region()) + if __name__ == '__main__': unittest.main() From 6f98d790ffb4ca9506242a38dfece493262c720a Mon Sep 17 00:00:00 2001 From: Valentin NOEL Date: Wed, 8 Nov 2023 11:08:24 +0100 Subject: [PATCH 13/13] SCC: reader skips Caption Channel 2 content --- src/main/python/ttconv/scc/codes/__init__.py | 20 +++++++- .../ttconv/scc/codes/attribute_codes.py | 2 +- .../python/ttconv/scc/codes/control_codes.py | 2 +- .../python/ttconv/scc/codes/mid_row_codes.py | 2 +- .../scc/codes/preambles_address_codes.py | 9 ++-- src/main/python/ttconv/scc/context.py | 4 ++ src/main/python/ttconv/scc/disassembly.py | 46 +++++++++++++++---- src/main/python/ttconv/scc/line.py | 18 +++++++- src/main/python/ttconv/scc/reader.py | 6 +-- src/main/python/ttconv/scc/word.py | 33 +++++++++---- src/test/python/test_scc_pacs.py | 2 +- src/test/python/test_scc_reader.py | 36 +++++++++++++++ 12 files changed, 148 insertions(+), 32 deletions(-) diff --git a/src/main/python/ttconv/scc/codes/__init__.py b/src/main/python/ttconv/scc/codes/__init__.py index 59d4ecbd..7c9ecebb 100644 --- a/src/main/python/ttconv/scc/codes/__init__.py +++ b/src/main/python/ttconv/scc/codes/__init__.py @@ -26,7 +26,7 @@ """SCC Codes""" from enum import Enum -from typing import Tuple +from typing import Tuple, Optional from ttconv.style_properties import NamedColors SCC_COLOR_MAPPING = { @@ -46,6 +46,14 @@ 0x0D: NamedColors.magenta.value } +class SccChannel(Enum): + """SCC Caption Channel""" + CHANNEL_1 = 1 + CHANNEL_2 = 2 + + def __str__(self): + return "CC" + str(self.value) + class SccCode(Enum): """SCC codes base definition class""" @@ -61,6 +69,16 @@ def get_values(self) -> Tuple[int, int]: """Returns SCC Code values""" return self._channel_1, self._channel_2 + def get_channel(self, value: int) -> Optional[SccChannel]: + """Returns caption channel corresponding to the specified code value""" + if value == self._channel_1: + return SccChannel.CHANNEL_1 + + if value == self._channel_2: + return SccChannel.CHANNEL_2 + + return None + def contains_value(self, value: int) -> bool: """Returns whether the specified value is contained into the SCC code channels values""" return value in self.get_values() diff --git a/src/main/python/ttconv/scc/codes/attribute_codes.py b/src/main/python/ttconv/scc/codes/attribute_codes.py index 738082d3..a378a5f0 100644 --- a/src/main/python/ttconv/scc/codes/attribute_codes.py +++ b/src/main/python/ttconv/scc/codes/attribute_codes.py @@ -88,4 +88,4 @@ def find(value: int) -> Optional[SccAttributeCode]: def debug(self, value: int) -> str: """Debug representation of the code""" - return "[ATC|" + self.get_name() + "/" + hex(value) + "]" + return "[" + str(self.get_channel(value)) + "|ATC|" + self.get_name() + "/" + hex(value) + "]" diff --git a/src/main/python/ttconv/scc/codes/control_codes.py b/src/main/python/ttconv/scc/codes/control_codes.py index ebdc93de..b747439a 100644 --- a/src/main/python/ttconv/scc/codes/control_codes.py +++ b/src/main/python/ttconv/scc/codes/control_codes.py @@ -77,4 +77,4 @@ def find(value: int) -> typing.Optional[SccControlCode]: def debug(self, value: int) -> str: """Debug representation of the code""" - return "[CC|" + self.get_name() + "/" + hex(value) + "]" + return "[" + str(self.get_channel(value)) + "|CC|" + self.get_name() + "/" + hex(value) + "]" diff --git a/src/main/python/ttconv/scc/codes/mid_row_codes.py b/src/main/python/ttconv/scc/codes/mid_row_codes.py index 2cc0f936..ce20e310 100644 --- a/src/main/python/ttconv/scc/codes/mid_row_codes.py +++ b/src/main/python/ttconv/scc/codes/mid_row_codes.py @@ -90,4 +90,4 @@ def find(value: int) -> typing.Optional[SccMidRowCode]: def debug(self, value: int) -> str: """Debug representation of the code""" - return "[MRC|" + self.get_name() + "/" + hex(value) + "]" + return "[" + str(self.get_channel(value)) + "|MRC|" + self.get_name() + "/" + hex(value) + "]" diff --git a/src/main/python/ttconv/scc/codes/preambles_address_codes.py b/src/main/python/ttconv/scc/codes/preambles_address_codes.py index c530322b..7ce51728 100644 --- a/src/main/python/ttconv/scc/codes/preambles_address_codes.py +++ b/src/main/python/ttconv/scc/codes/preambles_address_codes.py @@ -29,7 +29,7 @@ from typing import Optional -from ttconv.scc.codes import SCC_COLOR_MAPPING +from ttconv.scc.codes import SCC_COLOR_MAPPING, SccChannel from ttconv.style_properties import NamedColors, TextDecorationType, \ FontStyleType, ColorType @@ -103,7 +103,7 @@ def __init__(self, byte_1: int, byte_2: int): self._font_style: Optional[bool] = FontStyleType.italic if desc_bits.get_italic() else None self._text_decoration: Optional[TextDecorationType] = \ TextDecorationType(underline=True) if desc_bits.get_underline() else None - self._channel = 2 if byte_1 & 0x08 else 1 + self._channel = SccChannel.CHANNEL_2 if byte_1 & 0x08 else SccChannel.CHANNEL_1 def get_row(self) -> int: """Returns the PAC row""" @@ -125,7 +125,7 @@ def get_text_decoration(self) -> Optional[TextDecorationType]: """Returns PAC text decoration""" return self._text_decoration - def get_channel(self): + def get_channel(self) -> SccChannel: """Returns PAC channel""" return self._channel @@ -165,7 +165,8 @@ def _get_description_bits(byte_2: int) -> Optional[_SccPacDescriptionBits]: def debug(self, value: int) -> str: """Debug representation of the code""" - debug = "[PAC|" + str(self.get_row()) + "|" + str(self.get_indent()) + debug = "[" + str(self.get_channel()) + "|" + debug += "PAC|" + str(self.get_row()) + "|" + str(self.get_indent()) if self.get_color() is not None: debug += "|" + str(self.get_color()) if self.get_font_style() is not None: diff --git a/src/main/python/ttconv/scc/context.py b/src/main/python/ttconv/scc/context.py index c4572451..1978ebad 100644 --- a/src/main/python/ttconv/scc/context.py +++ b/src/main/python/ttconv/scc/context.py @@ -34,6 +34,7 @@ from ttconv.model import Div from ttconv.scc.caption_paragraph import SccCaptionParagraph from ttconv.scc.caption_style import SccCaptionStyle +from ttconv.scc.codes import SccChannel from ttconv.scc.codes.attribute_codes import SccAttributeCode from ttconv.scc.codes.control_codes import SccControlCode from ttconv.scc.codes.mid_row_codes import SccMidRowCode @@ -73,6 +74,9 @@ def __init__(self, safe_area_x_offset: int, safe_area_y_offset: int, config: Opt # Captions being displayed self.active_caption: Optional[SccCaptionParagraph] = None + # Current caption channel (default is CC1) + self.current_channel = SccChannel.CHANNEL_1 + # Roll-up caption number of lines self.roll_up_depth: int = 0 diff --git a/src/main/python/ttconv/scc/disassembly.py b/src/main/python/ttconv/scc/disassembly.py index d4bf88fa..44cd1802 100644 --- a/src/main/python/ttconv/scc/disassembly.py +++ b/src/main/python/ttconv/scc/disassembly.py @@ -91,22 +91,20 @@ def get_text_decoration_disassembly(text_decoration: TextDecorationType) -> str: return "" -def get_scc_word_disassembly(scc_word: SccWord) -> str: +def get_scc_word_disassembly(scc_word: SccWord, show_channel = False) -> str: """Returns the disassembly code for specified SCC word""" if scc_word.value == 0x0000: return "{}" if scc_word.byte_1 < 0x20: - attribute_code = SccAttributeCode.find(scc_word.value) - control_code = SccControlCode.find(scc_word.value) - mid_row_code = SccMidRowCode.find(scc_word.value) pac = SccPreambleAddressCode.find(scc_word.byte_1, scc_word.byte_2) - spec_char = SccSpecialCharacter.find(scc_word.value) - extended_char = SccExtendedCharacter.find(scc_word.value) if pac is not None: - disassembly_code = f"{{{pac.get_row():02}" + disassembly_code = "{" + if show_channel: + disassembly_code += str(pac.get_channel()) + "|" + disassembly_code += f"{pac.get_row():02}" color = pac.get_color() indent = pac.get_indent() if indent is not None and indent > 0: @@ -120,29 +118,61 @@ def get_scc_word_disassembly(scc_word: SccWord) -> str: disassembly_code += "}" return disassembly_code + attribute_code = SccAttributeCode.find(scc_word.value) + if attribute_code is not None: disassembly_code = "{" + if show_channel: + disassembly_code += str(attribute_code.get_channel(scc_word.value)) + "|" disassembly_code += "B" if attribute_code.is_background() else "" disassembly_code += get_color_disassembly(attribute_code.get_color()) disassembly_code += get_text_decoration_disassembly(attribute_code.get_text_decoration()) disassembly_code += "}" return disassembly_code + mid_row_code = SccMidRowCode.find(scc_word.value) + if mid_row_code is not None: disassembly_code = "{" + if show_channel: + disassembly_code += str(mid_row_code.get_channel(scc_word.value)) + "|" disassembly_code += get_color_disassembly(mid_row_code.get_color()) disassembly_code += get_font_style_disassembly(mid_row_code.get_font_style()) disassembly_code += get_text_decoration_disassembly(mid_row_code.get_text_decoration()) disassembly_code += "}" return disassembly_code + control_code = SccControlCode.find(scc_word.value) + if control_code is not None: - return "{" + control_code.get_name() + "}" + disassembly_code = "{" + if show_channel: + disassembly_code += str(control_code.get_channel(scc_word.value)) + "|" + disassembly_code += control_code.get_name() + disassembly_code += "}" + return disassembly_code + + # print(f"{hex(scc_word.byte_1)}{hex(scc_word.byte_2)}") + spec_char = SccSpecialCharacter.find(scc_word.value) if spec_char is not None: + if show_channel: + disassembly_code = "[" + disassembly_code += str(spec_char.get_channel(scc_word.value)) + disassembly_code += "]" + disassembly_code += spec_char.get_unicode_value() + return disassembly_code return spec_char.get_unicode_value() + extended_char = SccExtendedCharacter.find(scc_word.value) + if extended_char is not None: + if show_channel: + disassembly_code = "[" + disassembly_code += str(extended_char.get_channel(scc_word.value)) + disassembly_code += "]" + disassembly_code += extended_char.get_unicode_value() + return disassembly_code return extended_char.get_unicode_value() LOGGER.warning("Unsupported SCC word: %s", hex(scc_word.value)) diff --git a/src/main/python/ttconv/scc/line.py b/src/main/python/ttconv/scc/line.py index b86d4261..fdeb93f3 100644 --- a/src/main/python/ttconv/scc/line.py +++ b/src/main/python/ttconv/scc/line.py @@ -32,6 +32,7 @@ from typing import List, Optional from ttconv.scc.caption_style import SccCaptionStyle +from ttconv.scc.codes import SccChannel from ttconv.scc.codes.attribute_codes import SccAttributeCode from ttconv.scc.codes.control_codes import SccControlCode from ttconv.scc.codes.extended_characters import SccExtendedCharacter @@ -94,12 +95,12 @@ def get_style(self) -> SccCaptionStyle: return SccCaptionStyle.Unknown - def to_disassembly(self) -> str: + def to_disassembly(self, show_channels = False) -> str: """Converts SCC line into the disassembly format""" disassembly_line = str(self.time_code) + "\t" for scc_word in self.scc_words: - disassembly_line += get_scc_word_disassembly(scc_word) + disassembly_line += get_scc_word_disassembly(scc_word, show_channels) return disassembly_line @@ -122,6 +123,15 @@ def process(self, context: SccContext) -> SmpteTimeCode: if scc_word.byte_1 < 0x20: scc_code = scc_word.get_code() + caption_channel = scc_word.get_channel() + + if caption_channel is not SccChannel.CHANNEL_1: + if context.current_channel is not caption_channel: + LOGGER.warning("Skip Caption Channel 2 content") + context.current_channel = caption_channel + continue + + context.current_channel = caption_channel if isinstance(scc_code, SccPreambleAddressCode): debug += scc_code.debug(scc_word.value) @@ -163,6 +173,10 @@ def process(self, context: SccContext) -> SmpteTimeCode: context.previous_word_type = None else: + if context.current_channel is not SccChannel.CHANNEL_1: + # LOGGER.warning("Skip Caption Channel 2 code") + continue + text = scc_word.to_text() debug += text context.process_text(text, self.time_code) diff --git a/src/main/python/ttconv/scc/reader.py b/src/main/python/ttconv/scc/reader.py index 03873d07..fa4fe855 100644 --- a/src/main/python/ttconv/scc/reader.py +++ b/src/main/python/ttconv/scc/reader.py @@ -104,8 +104,8 @@ def to_model(scc_content: str, config: Optional[SccReaderConfiguration] = None, return document -def to_disassembly(scc_content: str) -> str: - """Dumps a SCC document into the disassembly format""" +def to_disassembly(scc_content: str, show_channels = False) -> str: + """Dumps an SCC document into the disassembly format""" disassembly = "" for line in scc_content.splitlines(): LOGGER.debug(line) @@ -114,7 +114,7 @@ def to_disassembly(scc_content: str) -> str: if scc_line is None: continue - line_to_disassembly = scc_line.to_disassembly() + line_to_disassembly = scc_line.to_disassembly(show_channels) LOGGER.debug(line_to_disassembly) disassembly += line_to_disassembly + "\n" diff --git a/src/main/python/ttconv/scc/word.py b/src/main/python/ttconv/scc/word.py index e2caf28c..e7bc8ae8 100644 --- a/src/main/python/ttconv/scc/word.py +++ b/src/main/python/ttconv/scc/word.py @@ -29,7 +29,7 @@ from typing import Optional -from ttconv.scc.codes import SccCode +from ttconv.scc.codes import SccCode, SccChannel from ttconv.scc.codes.attribute_codes import SccAttributeCode from ttconv.scc.codes.control_codes import SccControlCode from ttconv.scc.codes.extended_characters import SccExtendedCharacter @@ -48,6 +48,7 @@ def __init__(self, byte_1: int, byte_2: int): self.byte_1 = byte_1 self.byte_2 = byte_2 self.value = byte_1 * 0x100 + byte_2 + self.code: Optional[SccCode | SccPreambleAddressCode] = self._find_code() @staticmethod def _is_hex_word(word: str) -> bool: @@ -92,22 +93,34 @@ def from_str(hex_word: str) -> SccWord: data = bytes.fromhex(hex_word) return SccWord.from_bytes(data[0], data[1]) + def _find_code(self) -> Optional[SccCode | SccPreambleAddressCode]: + """Find corresponding code""" + if self.is_code(): + return SccControlCode.find(self.value) or \ + SccAttributeCode.find(self.value) or \ + SccMidRowCode.find(self.value) or \ + SccPreambleAddressCode.find(self.byte_1, self.byte_2) or \ + SccSpecialCharacter.find(self.value) or \ + SccExtendedCharacter.find(self.value) + return None + def to_text(self) -> str: """Converts SCC word to text""" return ''.join(SCC_STANDARD_CHARACTERS_MAPPING.get(byte, chr(byte)) for byte in [self.byte_1, self.byte_2] if byte != 0x00) def get_code(self) -> Optional[SccCode]: - """Find corresponding code""" - if self.is_code(): - return SccControlCode.find(self.value) or \ - SccAttributeCode.find(self.value) or \ - SccMidRowCode.find(self.value) or \ - SccPreambleAddressCode.find(self.byte_1, self.byte_2) or \ - SccSpecialCharacter.find(self.value) or \ - SccExtendedCharacter.find(self.value) - return None + """Returns the SCC code if any""" + return self.code def is_code(self) -> bool: """Returns true if the word is an SCC code, i.e. the first byte is a non-printing character in the range 10h to 1Fh.""" return 0x10 <= self.byte_1 <= 0x1F + + def get_channel(self) -> Optional[SccChannel]: + """Returns the caption channel, if the word is an SCC code""" + if self.is_code(): + if isinstance(self.code, SccPreambleAddressCode): + return self.code.get_channel() + return self.code.get_channel(self.value) + return None diff --git a/src/test/python/test_scc_pacs.py b/src/test/python/test_scc_pacs.py index bc8acb8e..8da06ea4 100644 --- a/src/test/python/test_scc_pacs.py +++ b/src/test/python/test_scc_pacs.py @@ -75,7 +75,7 @@ def test_scc_pac_values(self): self.assertIsNone(SccPreambleAddressCode.find(b1, b2)) def check_scc_pac_attributes(self, pac, channel, row, indent, color, font_style, text_decoration): - self.assertEqual(channel, pac.get_channel()) + self.assertEqual(channel, pac.get_channel().value) self.assertEqual(row, pac.get_row()) self.assertEqual(indent, pac.get_indent()) self.assertEqual(color, pac.get_color()) diff --git a/src/test/python/test_scc_reader.py b/src/test/python/test_scc_reader.py index 76f647e7..0b7aaf01 100644 --- a/src/test/python/test_scc_reader.py +++ b/src/test/python/test_scc_reader.py @@ -1490,6 +1490,42 @@ def test_scc_content_trying_to_roll_up_pop_on_paragraph(self): self.check_caption(p_list[1], "caption2", "11:19:30:20", None, ">>> Narrateur: Cette") self.assertEqual(region_2, p_list[1].get_region()) + def test_skipping_channel_2_content(self): + scc_content = """\ +01:03:27:29 1c20 1cd0 a843 4332 2920 1c2c 94ae 94ae 9420 9420 94f2 94f2 c845 d92c 2054 c845 5245 ae80 942c 942c 8080 8080 942f 942f +""" + expected_disassembly = """\ +01:03:27:29 {CC2|RCL}{CC2|1400}(CC2) {CC2|EDM}{CC1|ENM}{CC1|ENM}{CC1|RCL}{CC1|RCL}{CC1|1504}{CC1|1504}HEY, THERE.{CC1|EDM}{CC1|EDM}{}{}{CC1|EOC}{CC1|EOC} +""" + + disassembly = to_disassembly(scc_content, show_channels=True) + self.assertEqual(expected_disassembly, disassembly) + + doc = to_model(scc_content) + + self.assertIsNotNone(doc) + + region_1 = doc.get_region("pop1") + self.assertIsNotNone(region_1) + self.check_region_origin(region_1, 8, 16, doc.get_cell_resolution()) + self.check_region_extent(region_1, 11, 1, doc.get_cell_resolution()) + self.check_element_style(region_1, StyleProperties.DisplayAlign, DisplayAlignType.before) + self.check_element_style(region_1, StyleProperties.ShowBackground, ShowBackgroundType.whenActive) + + body = doc.get_body() + self.assertIsNotNone(body) + + div_list = list(body) + self.assertEqual(1, len(div_list)) + div = div_list[0] + self.assertIsNotNone(div) + + p_list = list(div) + self.assertEqual(1, len(p_list)) + + self.check_caption(p_list[0], "caption1", "01:03:28:18", None, "HEY, THERE.") + self.assertEqual(region_1, p_list[0].get_region()) + if __name__ == '__main__': unittest.main()