Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc #409

Closed
wants to merge 5 commits into from
Closed

Misc #409

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,16 @@ Default: `false`

Default: `false`

#### disable_ebu_style

`"disable_ebu_style" : true | false`

`true` means that the STL reader does not try to map EBU style as described in EBU Tech 3360

Implies `disable_line_padding`

Default: `false`

#### program_start_tc

`"program_start_tc" : "TCP" | "HH:MM:SS:FF"`
Expand All @@ -163,14 +173,22 @@ Overrides the font stack

Default: `"Verdana, Arial, Tiresias, sansSerif"`

#### ax_row_count
#### max_row_count

`"max_row_count" : "MNR" | integer`

Specifies a maximum number of rows for open subtitles, either the MNR field of the GSI block or a user-specified value

Default: `23`

#### force_bottom

`"force_bottom" : true | false`

Overrides line positions, force to bottom

Default: `false`

### SRT Writer configuration

#### text_formatting
Expand All @@ -191,6 +209,14 @@ Default: `true`

Default: `false`

#### text_position

`"text_position" : true | false`

`true` means that the VTT writer outputs text alignment cue settings

Default: `false`

#### cue_id

`"cue_id" : true | false`
Expand Down Expand Up @@ -271,9 +297,11 @@ Unit test code coverage is provided by the script at `scripts/coverage.sh`

Automated testing is provided by the script at `scripts/ci.sh`

Before first use, run `git submodule update --init` to get some required TTML samples from the W3C

#### Local

Run `./scripts/ci.sh`
Run `PYTHONPATH=src/main/python scripts/ci.sh`

#### GitHub actions

Expand Down
4 changes: 4 additions & 0 deletions src/main/python/ttconv/stl/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,13 @@ class STLReaderConfiguration(ModuleConfiguration):
disable_fill_line_gap: bool = field(default=False, metadata={"decoder": bool})
program_start_tc: typing.Optional[str] = field(default=None, metadata={"decoder": _decode_start_tc})
disable_line_padding: bool = field(default=False, metadata={"decoder": bool})
disable_ebu_style: bool = field(default=False, metadata={"decoder": bool})
if disable_ebu_style:
disable_line_padding=True
font_stack: typing.Optional[typing.Tuple[typing.Union[str, styles.GenericFontFamilyType]]] = \
field(default=None, metadata={"decoder": _decode_font_stack})
max_row_count: typing.Optional[typing.Union[int, str]] = field(default=None, metadata={"decoder": _decode_max_row_count})
force_bottom: bool = field(default=False, metadata={"decoder": bool})

@classmethod
def name(cls):
Expand Down
58 changes: 39 additions & 19 deletions src/main/python/ttconv/stl/datafile.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,9 +247,11 @@ def __init__(
gsi_block: bytes,
disable_fill_line_gap: bool = False,
disable_line_padding: bool = False,
disable_ebu_style: bool = False,
start_tc: typing.Optional[str] = None,
font_stack: typing.Tuple[typing.Union[str, styles.GenericFontFamilyType]] = None,
max_row_count: typing.Optional[typing.Union[int, str]] = None
max_row_count: typing.Optional[typing.Union[int, str]] = None,
force_bottom: bool = False,
):

self.gsi = _GSIBlock._make(
Expand All @@ -260,12 +262,13 @@ def __init__(

self.doc = model.ContentDocument()

self.doc.set_cell_resolution(
model.CellResolutionType(
columns=round(100 * DEFAULT_TELETEXT_COLS / (100 - 2 * DEFAULT_HORIZONTAL_SAFE_MARGIN_PCT)),
rows=round(100 * DEFAULT_TELETEXT_ROWS / (100 - 2 * DEFAULT_VERTICAL_SAFE_MARGIN_PCT))
if not disable_ebu_style:
self.doc.set_cell_resolution(
model.CellResolutionType(
columns=round(100 * DEFAULT_TELETEXT_COLS / (100 - 2 * DEFAULT_HORIZONTAL_SAFE_MARGIN_PCT)),
rows=round(100 * DEFAULT_TELETEXT_ROWS / (100 - 2 * DEFAULT_VERTICAL_SAFE_MARGIN_PCT))
)
)
)

self.doc.set_active_area(
model.ActiveAreaType(
Expand Down Expand Up @@ -374,6 +377,9 @@ def __init__(
else:
self.max_row_count = max_row_count

self.disable_ebu_style = disable_ebu_style
self.force_bottom = force_bottom

# p_element for use across cumulative subtitles
self.cur_p_element = None

Expand Down Expand Up @@ -499,32 +505,46 @@ def process_tti_block(self, tti_block: bytes):
else:
self.cur_p_element.set_style(styles.StyleProperties.TextAlign, styles.TextAlignType.center)

self.cur_p_element.set_style(
styles.StyleProperties.LineHeight,
styles.LengthType(DEFAULT_LINE_HEIGHT_PCT,
styles.LengthType.Units.pct)
)

if self.is_teletext() and not is_double_height_characters:
font_size = DEFAULT_SINGLE_HEIGHT_FONT_SIZE_PCT
else:
font_size = DEFAULT_DOUBLE_HEIGHT_FONT_SIZE_PCT

self.cur_p_element.set_style(
styles.StyleProperties.FontSize,
styles.LengthType(
font_size,
styles.LengthType.Units.pct
if not self.disable_ebu_style:
self.cur_p_element.set_style(
styles.StyleProperties.FontSize,
styles.LengthType(
font_size,
styles.LengthType.Units.pct
)
)
self.cur_p_element.set_style(
styles.StyleProperties.LineHeight,
styles.LengthType(DEFAULT_LINE_HEIGHT_PCT,
styles.LengthType.Units.pct)
)
)

safe_area_height = round(100 - DEFAULT_VERTICAL_SAFE_MARGIN_PCT * 2)
safe_area_width = round(100 - DEFAULT_HORIZONTAL_SAFE_MARGIN_PCT * 2)

if self.force_bottom:
# single region of the full safe area
r_y = DEFAULT_VERTICAL_SAFE_MARGIN_PCT
r_height = 100 - r_y

region = _get_region_from_model(
self.doc,
round(DEFAULT_HORIZONTAL_SAFE_MARGIN_PCT),
r_y,
safe_area_width,
r_height,
styles.DisplayAlignType.after
)

# assume that VP < max number of rows/2 means bottom-aligned and otherwise top-aligned
# probably should offer an option to override this

if tti.VP < self.get_max_row_count() // 2:
elif tti.VP < self.get_max_row_count() // 2:
# top-aligned large region

r_y = DEFAULT_VERTICAL_SAFE_MARGIN_PCT + ((tti.VP - 1) / self.get_max_row_count()) * safe_area_height
Expand Down
4 changes: 3 additions & 1 deletion src/main/python/ttconv/stl/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ def to_model(data_file: typing.IO, config: typing.Optional[STLReaderConfiguratio
data_file.read(1024),
disable_fill_line_gap=False if config is None else config.disable_fill_line_gap,
disable_line_padding=False if config is None else config.disable_line_padding,
disable_ebu_style=False if config is None else config.disable_ebu_style,
start_tc=None if config is None else config.program_start_tc,
font_stack=None if config is None else config.font_stack,
max_row_count=None if config is None else config.max_row_count
max_row_count=None if config is None else config.max_row_count,
force_bottom=False if config is None else config.force_bottom
)

for i in itertools.count():
Expand Down
2 changes: 1 addition & 1 deletion src/main/python/ttconv/tt.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def convert(args):
#
# Write out the converted file
#
tree_from_model.write(outputfile, encoding="utf-8")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The XML declaration is optional in XML 1.0, which is the default for IMSC and TTML.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, I really don't know about the standards, I check the ttml with https://github.com/skynav/ttt
The result is that, without the xml declaration, I get encoding errors on some diacritical marks and french symbols (ç ...).
Is utf-8 the default when no xml declaration exists -> do you think it is a ttt issue which fails to defaults to utf-8 ? or should the particular encoding characters be detected to trigger the xml declaration ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ngaullier Can you provide a sample file that fails TTV validation? Looking at code I wrote that uses TTV, I force UTF-8:

      args.add("--force-encoding");
      args.add("UTF-8");

There is also a known issue with BOM (skynav/ttt#193) but it is probably not relevant here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

samplefrench.zip
PYTHONPATH=src/main/python python3 src/main/python/ttconv/tt.py convert -i /mnt/d/samplefrench.stl -o /mnt/d/samplefrench.ttml

Timed Text Verifier (TTV) [7.2-SNAPSHOT] Copyright (c) 2013-21 Skynav, Inc.
[E]:Malformed US-ASCII at byte offset 815 of one byte.

On "é" of métisse:
10:00:45:06-10:00:47:07 SGN.SN.EBN.CS.VP=00.0012.FF.00.20C
[DOUBLE HEIGHT][ALPHA CYAN] <<qui est métisse,>>|| (30)

Yes, I also experienced ttv/BOM issue, but personnaly I don't like BOMs anyway!

tree_from_model.write(outputfile, encoding="utf-8", xml_declaration=True)

elif writer_type is FileTypes.SRT:
#
Expand Down
3 changes: 3 additions & 0 deletions src/main/python/ttconv/vtt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,8 @@ def name(cls):
# outputs `line` and `line alignment` cue settings
line_position: bool = field(default=False, metadata={"decoder": bool})

# outputs `text alignment` cue settings
text_position: bool = field(default=False, metadata={"decoder": bool})

# outputs cue identifier
cue_id: bool = field(default=True, metadata={"decoder": bool})
15 changes: 15 additions & 0 deletions src/main/python/ttconv/vtt/cue.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ class LineAlignment(Enum):
center = "center"
end = "end"

class TextAlignment(Enum):
"""WebVTT text alignment cue setting"""
left = "left"
middle = "middle"
right = "right"

_EOL_SEQ_RE = re.compile(r"\n{2,}")

def __init__(self, identifier: Optional[int] = None):
Expand All @@ -51,6 +57,7 @@ def __init__(self, identifier: Optional[int] = None):
self._text: str = ""
self._line: int = None
self._align: VttCue.LineAlignment = None
self._textalign: VttCue.TextAlignment = None

def set_begin(self, offset: Fraction):
"""Sets the paragraph begin time code"""
Expand Down Expand Up @@ -89,6 +96,10 @@ def get_align(self) -> Optional[LineAlignment]:
"""Return the WebVTT line alignment cue setting"""
return self._align

def set_textalign(self, textalign: TextAlignment):
"""Sets the WebVTT text alignment cue setting"""
self._textalign = textalign

def is_only_whitespace_or_empty(self):
"""Returns whether the paragraph text contains only whitespace or is empty"""
return len(self._text) == 0 or self._text.isspace()
Expand Down Expand Up @@ -123,6 +134,10 @@ def __str__(self) -> str:
# cue timing
t += f"{self._begin} --> {self._end}"

# cue text position
if self._textalign is not None:
t += f" align:{self._textalign.value}"

# cue line position
if self._line is not None:
t += f" line:{self._line}%"
Expand Down
18 changes: 16 additions & 2 deletions src/main/python/ttconv/vtt/writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from ttconv.isd import ISD
from ttconv.vtt.cue import VttCue
from ttconv.vtt.css_class import CssClass
from ttconv.style_properties import ExtentType, PositionType, StyleProperties, FontStyleType, NamedColors, FontWeightType, TextDecorationType, DisplayAlignType
from ttconv.style_properties import ExtentType, PositionType, StyleProperties, FontStyleType, NamedColors, FontWeightType, TextDecorationType, DisplayAlignType, TextAlignType

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -84,7 +84,12 @@ def __init__(self, config: VTTWriterConfiguration):
StyleProperties.DisplayAlign: [],
StyleProperties.Extent: [],
})


if self._config.text_position:
supported_styles.update({
StyleProperties.TextAlign: [],
})

self._filters.append(SupportedStylePropertiesFilter(supported_styles))

self._filters.append(
Expand Down Expand Up @@ -175,6 +180,15 @@ def process_p(self, region: ISD.Region, element: model.P, begin: Fraction, end:
cue.set_line(round(position.v_offset.value + extent.height.value / 2))
cue.set_align(VttCue.LineAlignment.center)

if self._config.text_position:
text_align = element.get_style(StyleProperties.TextAlign)
if text_align == TextAlignType.center:
cue.set_textalign(VttCue.TextAlignment.middle)
elif text_align == TextAlignType.start:
cue.set_textalign(VttCue.TextAlignment.left)
elif text_align == TextAlignType.end:
cue.set_textalign(VttCue.TextAlignment.right)

self._paragraphs.append(cue)

for elem in list(element):
Expand Down
54 changes: 49 additions & 5 deletions src/test/python/test_vtt_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,21 +114,25 @@ def test_position(self):
<head>
<styling>
<style xml:id="style.center" tts:fontFamily="Arial" tts:fontSize="100%" tts:fontStyle="normal" tts:fontWeight="normal" tts:backgroundColor="transparent" tts:color="white" tts:textAlign="center"/>
<style xml:id="style.left" tts:fontFamily="Arial" tts:fontSize="100%" tts:fontStyle="normal" tts:fontWeight="normal" tts:backgroundColor="transparent" tts:color="white" tts:textAlign="left"/>
<style xml:id="style.right" tts:fontFamily="Arial" tts:fontSize="100%" tts:fontStyle="normal" tts:fontWeight="normal" tts:backgroundColor="transparent" tts:color="white" tts:textAlign="right"/>
</styling>
<layout>
<region xml:id="region.after" tts:displayAlign="after" tts:backgroundColor="transparent" tts:origin="10% 10%" tts:extent="80% 80%"/>
<region xml:id="region.before" tts:displayAlign="before" tts:backgroundColor="transparent" tts:origin="10% 10%" tts:extent="80% 80%"/>
<region xml:id="region.center" tts:displayAlign="center" tts:backgroundColor="transparent" tts:origin="10% 10%" tts:extent="80% 80%"/>
</layout>
</head>
<body>
<div>
<p style="style.center" region="region.after" begin="00:00:03:12" end="00:00:12:00">Only one or two short samples are needed<br/>to make sure the conversion basically works</p>
<p style="style.center" region="region.before" begin="00:00:14:09" end="00:00:25:17">Cool, got it, will do it by end of next week.</p>
<p style="style.left" region="region.before" begin="00:00:14:09" end="00:00:25:17">Cool, got it, will do it by end of next week.</p>
<p style="style.right" region="region.center" begin="00:00:26:00" end="00:00:28:00">Yes.</p>
</div>
</body>
</tt>"""

expected_vtt="""WEBVTT
expected_vtt_line="""WEBVTT

1
00:00:03.501 --> 00:00:12.000 line:90%,end
Expand All @@ -138,17 +142,57 @@ def test_position(self):
2
00:00:14.375 --> 00:00:25.709 line:10%,start
Cool, got it, will do it by end of next week.

3
00:00:26.000 --> 00:00:28.000 line:50%,center
Yes.
"""
expected_vtt_align="""WEBVTT

1
00:00:03.501 --> 00:00:12.000 align:middle
Only one or two short samples are needed
to make sure the conversion basically works

2
00:00:14.375 --> 00:00:25.709 align:left
Cool, got it, will do it by end of next week.

3
00:00:26.000 --> 00:00:28.000 align:right
Yes.
"""
expected_vtt_line_and_align="""WEBVTT

1
00:00:03.501 --> 00:00:12.000 align:middle line:90%,end
Only one or two short samples are needed
to make sure the conversion basically works

2
00:00:14.375 --> 00:00:25.709 align:left line:10%,start
Cool, got it, will do it by end of next week.

3
00:00:26.000 --> 00:00:28.000 align:right line:50%,center
Yes.
"""

model = imsc_reader.to_model(et.ElementTree(et.fromstring(ttml_doc_str)))
config = VTTWriterConfiguration()
config.line_position = True
vtt_from_model = vtt_writer.from_model(model, config)
self.assertEqual(expected_vtt, vtt_from_model)
self.assertEqual(expected_vtt_line, vtt_from_model)
config.text_position = True
vtt_from_model = vtt_writer.from_model(model, config)
self.assertEqual(expected_vtt_line_and_align, vtt_from_model)
config.line_position = False
vtt_from_model = vtt_writer.from_model(model, config)
self.assertEqual(expected_vtt_align, vtt_from_model)

config = VTTWriterConfiguration.parse(json.loads('{"line_position":true}'))
config = VTTWriterConfiguration.parse(json.loads('{"line_position":true, "text_position":true}'))
vtt_from_model = vtt_writer.from_model(model, config)
self.assertEqual(expected_vtt, vtt_from_model)
self.assertEqual(expected_vtt_line_and_align, vtt_from_model)

def test_cue_id(self):
ttml_doc_str = """<?xml version="1.0" encoding="UTF-8"?>
Expand Down
Loading