diff --git a/.gitignore b/.gitignore
index f8e1a654..fac9db78 100644
--- a/.gitignore
+++ b/.gitignore
@@ -38,6 +38,7 @@ docs/_build
# Environments
env/
venv/
+.env
# Pyenv files
.python-version
diff --git a/docs/conf.py b/docs/conf.py
index 479cdee8..77990294 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -32,30 +32,30 @@
extensions = []
# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
# The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
# The encoding of source files.
# source_encoding = 'utf-8-sig'
# The master toctree document.
-master_doc = 'index'
+master_doc = "index"
# General information about the project.
-project = 'pycaption'
-copyright = '2012-2026, PBS.org ' \
- '(available under the Apache License, Version 2.0)'
+project = "pycaption"
+copyright = "2012-2026, PBS.org " \
+ "(available under the Apache License, Version 2.0)"
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
-version = '2.2.20'
+version = "2.2.20"
# The full version, including alpha/beta/rc tags.
-release = '2.2.20'
+release = "2.2.20"
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
@@ -69,7 +69,7 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
# The reST default role (used for this markup: `text`) to use for all
# documents.
@@ -87,7 +87,7 @@
# show_authors = False
# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
# A list of ignored prefixes for module index sorting.
# modindex_common_prefix = []
@@ -131,7 +131,7 @@
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
# Add any extra paths that contain custom files (such as robots.txt or
# .htaccess) here, relative to this directory. These files are copied
@@ -180,7 +180,7 @@
# html_file_suffix = None
# Output file base name for HTML help builder.
-htmlhelp_basename = 'pycaptiondoc'
+htmlhelp_basename = "pycaptiondoc"
# -- Options for LaTeX output ---------------------------------------------
@@ -188,10 +188,8 @@
latex_elements = {
# The paper size ('letterpaper' or 'a4paper').
# 'papersize': 'letterpaper',
-
# The font size ('10pt', '11pt' or '12pt').
# 'pointsize': '10pt',
-
# Additional stuff for the LaTeX preamble.
# 'preamble': '',
}
@@ -200,8 +198,7 @@
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
- ('index', 'pycaption.tex', 'pycaption Documentation',
- 'PBS', 'manual'),
+ ("index", "pycaption.tex", "pycaption Documentation", "PBS", "manual"),
]
# The name of an image file (relative to this directory) to place at the top of
@@ -229,10 +226,7 @@
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
-man_pages = [
- ('index', 'pycaption', 'pycaption Documentation',
- ['PBS'], 1)
-]
+man_pages = [("index", "pycaption", "pycaption Documentation", ["PBS"], 1)]
# If true, show URL addresses after external links.
# man_show_urls = False
@@ -244,9 +238,15 @@
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
- ('index', 'pycaption', 'pycaption Documentation',
- 'PBS', 'pycaption', 'One line description of project.',
- 'Miscellaneous'),
+ (
+ "index",
+ "pycaption",
+ "pycaption Documentation",
+ "PBS",
+ "pycaption",
+ "One line description of project.",
+ "Miscellaneous",
+ ),
]
# Documents to append as an appendix to all manuals.
diff --git a/pycaption/__init__.py b/pycaption/__init__.py
index adc9b501..3e60702f 100644
--- a/pycaption/__init__.py
+++ b/pycaption/__init__.py
@@ -1,30 +1,52 @@
-from .base import (
- CaptionConverter, CaptionNode, Caption, CaptionList, CaptionSet,
+from .base import Caption, CaptionConverter, CaptionList, CaptionNode, CaptionSet
+from .dfxp import DFXPReader, DFXPWriter
+from .exceptions import (
+ CaptionLineLengthError,
+ CaptionReadError,
+ CaptionReadNoCaptions,
+ CaptionReadSyntaxError,
)
-from .dfxp import DFXPWriter, DFXPReader
from .microdvd import MicroDVDReader, MicroDVDWriter
from .sami import SAMIReader, SAMIWriter
-from .srt import SRTReader, SRTWriter
from .scc import SCCReader, SCCWriter
from .scc.translator import translate_scc
+from .srt import SRTReader, SRTWriter
from .transcript import TranscriptWriter
from .webvtt import WebVTTReader, WebVTTWriter
-from .exceptions import (
- CaptionReadError, CaptionReadNoCaptions, CaptionReadSyntaxError, CaptionLineLengthError
-)
-
__all__ = [
- 'CaptionConverter', 'DFXPReader', 'DFXPWriter', 'MicroDVDReader',
- 'MicroDVDWriter', 'SAMIReader', 'SAMIWriter', 'SRTReader', 'SRTWriter',
- 'SCCReader', 'SCCWriter', 'translate_scc', 'WebVTTReader', 'WebVTTWriter',
- 'CaptionReadError', 'CaptionReadNoCaptions', 'CaptionReadSyntaxError',
- 'detect_format', 'CaptionNode', 'Caption', 'CaptionList', 'CaptionSet',
- 'TranscriptWriter'
+ "CaptionConverter",
+ "DFXPReader",
+ "DFXPWriter",
+ "MicroDVDReader",
+ "MicroDVDWriter",
+ "SAMIReader",
+ "SAMIWriter",
+ "SRTReader",
+ "SRTWriter",
+ "SCCReader",
+ "SCCWriter",
+ "translate_scc",
+ "WebVTTReader",
+ "WebVTTWriter",
+ "CaptionReadError",
+ "CaptionReadNoCaptions",
+ "CaptionReadSyntaxError",
+ "detect_format",
+ "CaptionNode",
+ "Caption",
+ "CaptionList",
+ "CaptionSet",
+ "TranscriptWriter",
]
SUPPORTED_READERS = (
- DFXPReader, MicroDVDReader, WebVTTReader, SAMIReader, SRTReader, SCCReader,
+ DFXPReader,
+ MicroDVDReader,
+ WebVTTReader,
+ SAMIReader,
+ SRTReader,
+ SCCReader,
)
@@ -36,7 +58,7 @@ def detect_format(caps):
"""
if not len(caps):
raise CaptionReadNoCaptions("Empty caption file")
-
+
for reader in SUPPORTED_READERS:
if reader().detect(caps):
return reader
diff --git a/pycaption/dfxp/__init__.py b/pycaption/dfxp/__init__.py
index 75d48474..aa19ca9c 100644
--- a/pycaption/dfxp/__init__.py
+++ b/pycaption/dfxp/__init__.py
@@ -1,2 +1,2 @@
from .base import * # noqa: F401, F403
-from .extras import SinglePositioningDFXPWriter, LegacyDFXPWriter # noqa: F401
+from .extras import LegacyDFXPWriter, SinglePositioningDFXPWriter # noqa: F401
diff --git a/pycaption/dfxp/base.py b/pycaption/dfxp/base.py
index bef05864..b6facc97 100644
--- a/pycaption/dfxp/base.py
+++ b/pycaption/dfxp/base.py
@@ -5,25 +5,43 @@
from bs4 import BeautifulSoup, NavigableString
from ..base import (
- BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode,
DEFAULT_LANGUAGE_CODE,
+ BaseReader,
+ BaseWriter,
+ Caption,
+ CaptionList,
+ CaptionNode,
+ CaptionSet,
)
from ..exceptions import (
- CaptionReadNoCaptions, CaptionReadSyntaxError, InvalidInputError,
+ CaptionReadNoCaptions,
+ CaptionReadSyntaxError,
CaptionReadTimingError,
+ InvalidInputError,
)
from ..geometry import (
- Point, Stretch, UnitEnum, Padding, VerticalAlignmentEnum,
- HorizontalAlignmentEnum, Alignment, Layout,
+ Alignment,
+ HorizontalAlignmentEnum,
+ Layout,
+ Padding,
+ Point,
+ Stretch,
+ UnitEnum,
+ VerticalAlignmentEnum,
)
from ..utils import is_leaf
__all__ = [
- 'DFXP_BASE_MARKUP', 'DFXP_DEFAULT_STYLE', 'DFXP_DEFAULT_STYLE_ID',
- 'DFXP_DEFAULT_REGION_ID', 'DFXPReader', 'DFXPWriter', 'DFXP_DEFAULT_REGION'
+ "DFXP_BASE_MARKUP",
+ "DFXP_DEFAULT_STYLE",
+ "DFXP_DEFAULT_STYLE_ID",
+ "DFXP_DEFAULT_REGION_ID",
+ "DFXPReader",
+ "DFXPWriter",
+ "DFXP_DEFAULT_REGION",
]
-DFXP_BASE_MARKUP = '''
+DFXP_BASE_MARKUP = """
@@ -32,36 +50,35 @@
-'''
+"""
DFXP_DEFAULT_STYLE = {
- 'color': 'white',
- 'font-family': 'monospace',
- 'font-size': '1c',
+ "color": "white",
+ "font-family": "monospace",
+ "font-size": "1c",
}
DFXP_DEFAULT_REGION = Layout(
- alignment=Alignment(
- HorizontalAlignmentEnum.START, VerticalAlignmentEnum.BOTTOM)
+ alignment=Alignment(HorizontalAlignmentEnum.START, VerticalAlignmentEnum.BOTTOM)
)
-DFXP_DEFAULT_STYLE_ID = 'default'
-DFXP_DEFAULT_REGION_ID = 'bottom'
+DFXP_DEFAULT_STYLE_ID = "default"
+DFXP_DEFAULT_REGION_ID = "bottom"
CLOCK_TIME_PATTERN = (
- r'(?P nodes matter)
# On elements like it is also read, because this was legacy
# behavior.
- if getattr(element, 'name', None) in ('span', 'p'):
+ if getattr(element, "name", None) in ("span", "p"):
text_align_source = element
else:
text_align_source = None
- text_align = (
- self._find_attribute(text_align_source, 'tts:textAlign')
- or _create_external_horizontal_alignment(
+ text_align = self._find_attribute(
+ text_align_source, "tts:textAlign"
+ ) or _create_external_horizontal_alignment(
DFXP_DEFAULT_REGION.alignment.horizontal
)
- )
- display_align = (
- self._find_attribute(usable_elem, 'tts:displayAlign')
- or _create_external_vertical_alignment(
- DFXP_DEFAULT_REGION.alignment.vertical
- )
- )
+ display_align = self._find_attribute(
+ usable_elem, "tts:displayAlign"
+ ) or _create_external_vertical_alignment(DFXP_DEFAULT_REGION.alignment.vertical)
alignment = _create_internal_alignment(text_align, display_align)
return origin, extent, padding, alignment
- def _find_attribute_on_element_or_styles(self, attribute_name, element,
- factory, ignore, ignorecase):
+ def _find_attribute_on_element_or_styles(
+ self, attribute_name, element, factory, ignore, ignorecase
+ ):
"""Look up the given attribute on the element, and all the styles
referenced by it.
@@ -886,8 +914,7 @@ def _find_attribute_on_element_or_styles(self, attribute_name, element,
)
if value is None:
# Does a referenced style of the element have it?
- for style in self._get_style_sources(
- self._styling_section, element):
+ for style in self._get_style_sources(self._styling_section, element):
value = _get_object_from_attribute(
style, attribute_name, factory, ignore, ignorecase
)
@@ -895,8 +922,9 @@ def _find_attribute_on_element_or_styles(self, attribute_name, element,
break
return value
- def _find_attribute(self, element, attribute_name, factory=lambda x: x,
- ignore=(), ignorecase=True):
+ def _find_attribute(
+ self, element, attribute_name, factory=lambda x: x, ignore=(), ignorecase=True
+ ):
"""Try to find the `attribute_name` specified on the element, all its
parents and all their styles (and referenced styles).
@@ -918,7 +946,8 @@ def _find_attribute(self, element, attribute_name, factory=lambda x: x,
# Does the element itself have it inline, or any of its styles?
if element:
value = self._find_attribute_on_element_or_styles(
- attribute_name, element, factory, ignore, ignorecase)
+ attribute_name, element, factory, ignore, ignorecase
+ )
if value is None:
# Do any of the element's parents have the attribute?
@@ -952,7 +981,7 @@ def _find_root_extent(self):
if extent is None:
root = self.root_element
extent = _get_object_from_attribute(
- root, 'tts:extent', Stretch.from_xml_attribute
+ root, "tts:extent", Stretch.from_xml_attribute
)
if extent is not None:
@@ -1047,15 +1076,18 @@ def _create_unique_regions(unique_layouts, dfxp, id_factory):
:rtype: dict
"""
region_map = {}
- layout_section = dfxp.find('layout')
+ layout_section = dfxp.find("layout")
for region_spec in unique_layouts:
if (
- region_spec.origin or region_spec.extent
- or region_spec.padding or region_spec.alignment):
- new_region = dfxp.new_tag('region')
+ region_spec.origin
+ or region_spec.extent
+ or region_spec.padding
+ or region_spec.alignment
+ ):
+ new_region = dfxp.new_tag("region")
new_id = id_factory()
- new_region['xml:id'] = new_id
+ new_region["xml:id"] = new_id
region_map[region_spec] = new_id
region_attribs = _convert_layout_to_attributes(region_spec)
@@ -1071,29 +1103,31 @@ def create_document_regions(self):
"""
# Creates the default region
default_region_map = self._create_unique_regions(
- [DFXP_DEFAULT_REGION],
- self._dfxp, lambda: DFXP_DEFAULT_REGION_ID
+ [DFXP_DEFAULT_REGION], self._dfxp, lambda: DFXP_DEFAULT_REGION_ID
)
unique_regions = self._collect_unique_regions(
- self._caption_set, DFXP_DEFAULT_REGION)
+ self._caption_set, DFXP_DEFAULT_REGION
+ )
# Create the document specified regions
self._region_map = self._create_unique_regions(
- unique_regions, self._dfxp, self._get_new_id)
+ unique_regions, self._dfxp, self._get_new_id
+ )
self._region_map.update(default_region_map)
- def _get_new_id(self, prefix='r'):
+ def _get_new_id(self, prefix="r"):
"""Return new, unique ids (use an internal counter).
:type prefix: str
"""
- new_id = f'{prefix}{self._id_seed}'
+ new_id = f"{prefix}{self._id_seed}"
self._id_seed += 1
return new_id
def get_positioning_info(
- self, lang, caption_set=None, caption=None, caption_node=None):
+ self, lang, caption_set=None, caption=None, caption_node=None
+ ):
"""For the given element will return a valid region ID, used for
assigning to the element, and a dict containing the positioning
attributes of that region (useful for inline non-standard positioning)
@@ -1147,37 +1181,37 @@ def get_positioning_info(
def cleanup_regions(self):
"""Remove the unused regions from the output file"""
- layout_tag = self._dfxp.find('layout')
+ layout_tag = self._dfxp.find("layout")
if not layout_tag:
return
- regions = layout_tag.findChildren('region')
+ regions = layout_tag.findChildren("region")
if not regions:
return
for region in regions:
- if region.attrs.get('xml:id') not in self._assigned_region_ids:
+ if region.attrs.get("xml:id") not in self._assigned_region_ids:
region.extract()
def _recreate_style(content, dfxp):
dfxp_style = {}
- if 'class' in content:
- if dfxp.find("style", {"xml:id": content['class']}):
- dfxp_style['style'] = content['class']
- if 'text-align' in content:
- dfxp_style['tts:textAlign'] = content['text-align']
- if 'italics' in content:
- dfxp_style['tts:fontStyle'] = 'italic'
- if 'font-family' in content:
- dfxp_style['tts:fontFamily'] = content['font-family']
- if 'font-size' in content:
- dfxp_style['tts:fontSize'] = content['font-size']
- if 'color' in content:
- dfxp_style['tts:color'] = content['color']
- if 'display-align' in content:
- dfxp_style['tts:displayAlign'] = content['display-align']
+ if "class" in content:
+ if dfxp.find("style", {"xml:id": content["class"]}):
+ dfxp_style["style"] = content["class"]
+ if "text-align" in content:
+ dfxp_style["tts:textAlign"] = content["text-align"]
+ if "italics" in content:
+ dfxp_style["tts:fontStyle"] = "italic"
+ if "font-family" in content:
+ dfxp_style["tts:fontFamily"] = content["font-family"]
+ if "font-size" in content:
+ dfxp_style["tts:fontSize"] = content["font-size"]
+ if "color" in content:
+ dfxp_style["tts:color"] = content["color"]
+ if "display-align" in content:
+ dfxp_style["tts:displayAlign"] = content["display-align"]
return dfxp_style
@@ -1204,8 +1238,7 @@ def _create_internal_alignment(text_align, display_align):
if not (text_align or display_align):
return None
- return Alignment.from_horizontal_and_vertical_align(
- text_align, display_align)
+ return Alignment.from_horizontal_and_vertical_align(text_align, display_align)
def _create_external_horizontal_alignment(horizontal_component):
@@ -1218,15 +1251,15 @@ def _create_external_horizontal_alignment(horizontal_component):
result = None
if horizontal_component == HorizontalAlignmentEnum.LEFT:
- result = 'left'
+ result = "left"
if horizontal_component == HorizontalAlignmentEnum.CENTER:
- result = 'center'
+ result = "center"
if horizontal_component == HorizontalAlignmentEnum.RIGHT:
- result = 'right'
+ result = "right"
if horizontal_component == HorizontalAlignmentEnum.START:
- result = 'start'
+ result = "start"
if horizontal_component == HorizontalAlignmentEnum.END:
- result = 'end'
+ result = "end"
return result
@@ -1241,11 +1274,11 @@ def _create_external_vertical_alignment(vertical_component):
result = None
if vertical_component == VerticalAlignmentEnum.TOP:
- result = 'before'
+ result = "before"
if vertical_component == VerticalAlignmentEnum.CENTER:
- result = 'center'
+ result = "center"
if vertical_component == VerticalAlignmentEnum.BOTTOM:
- result = 'after'
+ result = "after"
return result
@@ -1265,21 +1298,20 @@ def _create_external_alignment(alignment):
if not (alignment.horizontal or alignment.vertical):
return result
- horizontal_alignment = _create_external_horizontal_alignment(
- alignment.horizontal)
+ horizontal_alignment = _create_external_horizontal_alignment(alignment.horizontal)
if horizontal_alignment:
- result['tts:textAlign'] = horizontal_alignment
+ result["tts:textAlign"] = horizontal_alignment
- vertical_alignment = _create_external_vertical_alignment(
- alignment.vertical)
+ vertical_alignment = _create_external_vertical_alignment(alignment.vertical)
if vertical_alignment:
- result['tts:displayAlign'] = vertical_alignment
+ result["tts:displayAlign"] = vertical_alignment
return result
-def _get_object_from_attribute(tag, attr_name, factory,
- ignore_vals=(), ignorecase=True):
+def _get_object_from_attribute(
+ tag, attr_name, factory, ignore_vals=(), ignorecase=True
+):
"""For the xml `tag`, tries to retrieve the attribute `attr_name` and
pass that to the factory in order to get a result. If the value of the
attribute is in the `ignore_vals` iterable, returns None.
@@ -1291,7 +1323,7 @@ def _get_object_from_attribute(tag, attr_name, factory,
:param ignore_vals: iterable of attribute values to ignore
:raise CaptionReadSyntaxError: if the attribute has some crazy value
"""
- if not hasattr(tag, 'has_attr'):
+ if not hasattr(tag, "has_attr"):
return
attr_value = None
@@ -1329,13 +1361,13 @@ def _convert_layout_to_attributes(layout):
return _create_external_alignment(DFXP_DEFAULT_REGION.alignment)
if layout.origin:
- result['tts:origin'] = layout.origin.to_xml_attribute()
+ result["tts:origin"] = layout.origin.to_xml_attribute()
if layout.extent:
- result['tts:extent'] = layout.extent.to_xml_attribute()
+ result["tts:extent"] = layout.extent.to_xml_attribute()
if layout.padding:
- result['tts:padding'] = layout.padding.to_xml_attribute()
+ result["tts:padding"] = layout.padding.to_xml_attribute()
if layout.alignment:
result.update(_create_external_alignment(layout.alignment))
diff --git a/pycaption/dfxp/extras.py b/pycaption/dfxp/extras.py
index 70a60c82..dfc4fa7f 100644
--- a/pycaption/dfxp/extras.py
+++ b/pycaption/dfxp/extras.py
@@ -6,10 +6,10 @@
from bs4 import BeautifulSoup
-from .base import DFXPWriter, DFXP_DEFAULT_REGION
from ..base import BaseWriter, CaptionNode, merge_concurrent_captions
+from .base import DFXP_DEFAULT_REGION, DFXPWriter
-LEGACY_DFXP_BASE_MARKUP = '''
+LEGACY_DFXP_BASE_MARKUP = """
@@ -18,33 +18,30 @@
-'''
+"""
LEGACY_DFXP_DEFAULT_STYLE = {
- 'color': 'white',
- 'font-family': 'monospace',
- 'font-size': '1c',
+ "color": "white",
+ "font-family": "monospace",
+ "font-size": "1c",
}
-LEGACY_DFXP_DEFAULT_STYLE_ID = 'default'
-LEGACY_DFXP_DEFAULT_REGION_ID = 'bottom'
+LEGACY_DFXP_DEFAULT_STYLE_ID = "default"
+LEGACY_DFXP_DEFAULT_REGION_ID = "bottom"
-LEGACY_DFXP_DEFAULT_REGION = {
- 'text-align': 'center',
- 'display-align': 'after'
-}
+LEGACY_DFXP_DEFAULT_REGION = {"text-align": "center", "display-align": "after"}
class SinglePositioningDFXPWriter(DFXPWriter):
"""
A dfxp writer, that ignores all positioning, using a single provided value
"""
- def __init__(self, default_positioning=DFXP_DEFAULT_REGION,
- *args, **kwargs):
+
+ def __init__(self, default_positioning=DFXP_DEFAULT_REGION, *args, **kwargs):
super().__init__(*args, **kwargs)
self.default_positioning = default_positioning
- def write(self, captions_set, force=''):
+ def write(self, captions_set, force=""):
"""Writes a DFXP file using the positioning provided in the initializer
:type captions_set: pycaption.base.CaptionSet
@@ -52,7 +49,8 @@ def write(self, captions_set, force=''):
:rtype: str
"""
captions_set = self._create_single_positioning_caption_set(
- captions_set, self.default_positioning)
+ captions_set, self.default_positioning
+ )
return super().write(captions_set, force) # noqa
@@ -80,42 +78,45 @@ def _create_single_positioning_caption_set(caption_set, positioning):
caption.layout_info = positioning
for node in caption.nodes:
- if hasattr(node, 'layout_info'):
+ if hasattr(node, "layout_info"):
node.layout_info = positioning
for _, style in caption_set.get_styles():
- if 'text-align' in style:
- style.pop('text-align')
+ if "text-align" in style:
+ style.pop("text-align")
return caption_set
class LegacyDFXPWriter(BaseWriter):
"""Ported the legacy DFXPWriter from 0.4.5"""
+
def __init__(self, *args, **kw):
self.p_style = False
self.open_span = False
- def write(self, caption_set, force=''):
+ def write(self, caption_set, force=""):
caption_set = deepcopy(caption_set)
caption_set = merge_concurrent_captions(caption_set)
- dfxp = BeautifulSoup(LEGACY_DFXP_BASE_MARKUP, 'lxml-xml')
- dfxp.find('tt')['xml:lang'] = "en"
+ dfxp = BeautifulSoup(LEGACY_DFXP_BASE_MARKUP, "lxml-xml")
+ dfxp.find("tt")["xml:lang"] = "en"
for style_id, style in caption_set.get_styles():
if style != {}:
dfxp = self._recreate_styling_tag(style_id, style, dfxp)
if not caption_set.get_styles():
dfxp = self._recreate_styling_tag(
- LEGACY_DFXP_DEFAULT_STYLE_ID, LEGACY_DFXP_DEFAULT_STYLE, dfxp)
+ LEGACY_DFXP_DEFAULT_STYLE_ID, LEGACY_DFXP_DEFAULT_STYLE, dfxp
+ )
# XXX For now we will always use this default region. In the future if
# regions are provided, they will be kept
dfxp = self._recreate_region_tag(
- LEGACY_DFXP_DEFAULT_REGION_ID, LEGACY_DFXP_DEFAULT_REGION, dfxp)
+ LEGACY_DFXP_DEFAULT_REGION_ID, LEGACY_DFXP_DEFAULT_REGION, dfxp
+ )
- body = dfxp.find('body')
+ body = dfxp.find("body")
if force:
langs = [self._force_language(force, caption_set.get_languages())]
@@ -123,17 +124,18 @@ def write(self, caption_set, force=''):
langs = caption_set.get_languages()
for lang in langs:
- div = dfxp.new_tag('div')
- div['xml:lang'] = lang
+ div = dfxp.new_tag("div")
+ div["xml:lang"] = lang
for caption in caption_set.get_captions(lang):
if caption.style:
caption_style = caption.style
- caption_style.update(
- {'region': LEGACY_DFXP_DEFAULT_REGION_ID})
+ caption_style.update({"region": LEGACY_DFXP_DEFAULT_REGION_ID})
else:
- caption_style = {'class': LEGACY_DFXP_DEFAULT_STYLE_ID,
- 'region': LEGACY_DFXP_DEFAULT_REGION_ID}
+ caption_style = {
+ "class": LEGACY_DFXP_DEFAULT_STYLE_ID,
+ "region": LEGACY_DFXP_DEFAULT_REGION_ID,
+ }
p = self._recreate_p_tag(caption, caption_style, dfxp)
div.append(p)
@@ -151,29 +153,29 @@ def _force_language(self, force, langs):
return langs[-1]
def _recreate_region_tag(self, region_id, styling, dfxp):
- dfxp_region = dfxp.new_tag('region')
- dfxp_region.attrs.update({'xml:id': region_id})
+ dfxp_region = dfxp.new_tag("region")
+ dfxp_region.attrs.update({"xml:id": region_id})
attributes = self._recreate_style(styling, dfxp)
dfxp_region.attrs.update(attributes)
- new_tag = dfxp.new_tag('region')
- new_tag.attrs.update({'xml:id': region_id})
+ new_tag = dfxp.new_tag("region")
+ new_tag.attrs.update({"xml:id": region_id})
if dfxp_region != new_tag:
- dfxp.find('layout').append(dfxp_region)
+ dfxp.find("layout").append(dfxp_region)
return dfxp
def _recreate_styling_tag(self, style, content, dfxp):
- dfxp_style = dfxp.new_tag('style')
- dfxp_style.attrs.update({'xml:id': style})
+ dfxp_style = dfxp.new_tag("style")
+ dfxp_style.attrs.update({"xml:id": style})
attributes = self._recreate_style(content, dfxp)
dfxp_style.attrs.update(attributes)
- new_tag = dfxp.new_tag('style')
- new_tag.attrs.update({'xml:id': style})
+ new_tag = dfxp.new_tag("style")
+ new_tag.attrs.update({"xml:id": style})
if dfxp_style != new_tag:
- dfxp.find('styling').append(dfxp_style)
+ dfxp.find("styling").append(dfxp_style)
return dfxp
@@ -184,21 +186,21 @@ def _recreate_p_tag(self, caption, caption_style, dfxp):
p.string = self._recreate_text(caption, dfxp)
if dfxp.find("style", {"xml:id": "p"}):
- p['style'] = 'p'
+ p["style"] = "p"
p.attrs.update(self._recreate_style(caption_style, dfxp))
return p
def _recreate_text(self, caption, dfxp):
- line = ''
+ line = ""
for node in caption.nodes:
if node.type_ == CaptionNode.TEXT:
- line += escape(node.content) + ' '
+ line += escape(node.content) + " "
elif node.type_ == CaptionNode.BREAK:
- line = line.rstrip() + '
- cdcd
-
+ cdcd
+
ghgh
+
mnmn
+
opop
+
qrqr
+
stst
+
yzyz
+
0101
+
2323
+
4545
- abcd
-
- abcd
+ abcdabcd
\n '
+ line = line.rstrip() + "
\n "
elif node.type_ == CaptionNode.STYLE:
- line = self._recreate_span(
- line, node, dfxp, caption_set, caption, lang)
+ line = self._recreate_span(line, node, dfxp, caption_set, caption, lang)
return line.rstrip()
- def _recreate_span(self, line, node, dfxp, caption_set=None, caption=None,
- lang=None):
+ def _recreate_span(
+ self, line, node, dfxp, caption_set=None, caption=None, lang=None
+ ):
# TODO - This method seriously has to go away!
# Because of the CaptionNode.STYLE nodes, tree-like structures are
# are really hard to build, and proper xml elements can't be added.
# We are left with creating tags manually, which is hard to understand
# and harder to maintain
if node.start:
- styles = ''
+ styles = ""
content_with_style = _recreate_style(node.content, dfxp)
for style, value in list(content_with_style.items()):
styles += f' {style}="{value}"'
if node.layout_info:
- region_id, region_attribs = (
- self.region_creator.get_positioning_info(
- lang, caption_set, caption, node
- ))
+ region_id, region_attribs = self.region_creator.get_positioning_info(
+ lang, caption_set, caption, node
+ )
styles += f' region="{region_id}"'
if self.write_inline_positioning:
- styles += ' ' + ' '.join(
- [
- f'{k_}="{v_}"'
- for k_, v_ in list(region_attribs.items())
- ]
+ styles += " " + " ".join(
+ [f'{k_}="{v_}"' for k_, v_ in list(region_attribs.items())]
)
if styles:
if self.open_span:
- line = line.rstrip() + ' '
- line += f''
+ line = line.rstrip() + " "
+ line += f""
self.open_span = True
elif self.open_span:
- line = line.rstrip() + ' '
+ line = line.rstrip() + " "
self.open_span = False
return line
@@ -518,13 +543,21 @@ class LayoutAwareDFXPParser(BeautifulSoup):
features we support, it was easier to use pre-order and it seems to have
been enough. It should be clarified whether this is ok or not.
"""
+
# A lot of elements will have no positioning info. Use this flyweight
# to save memory
NO_POSITIONING_INFO = None
- def __init__(self, markup="", features="html.parser", builder=None,
- parse_only=None, from_encoding=None,
- read_invalid_positioning=False, **kwargs):
+ def __init__(
+ self,
+ markup="",
+ features="html.parser",
+ builder=None,
+ parse_only=None,
+ from_encoding=None,
+ read_invalid_positioning=False,
+ **kwargs,
+ ):
"""The `features` param determines the parser to be used. The parsers
are usually html parsers, some more forgiving than others, and as such
they do stuff very differently especially for xml files. We chose this
@@ -558,12 +591,11 @@ def __init__(self, markup="", features="html.parser", builder=None,
# Work around for lack of ''' support in html.parser
markup = markup.replace("'", "'")
- super().__init__(
- markup, features, builder, parse_only, from_encoding, **kwargs)
+ super().__init__(markup, features, builder, parse_only, from_encoding, **kwargs)
self.read_invalid_positioning = read_invalid_positioning
- for div in self.find_all('div'):
+ for div in self.find_all("div"):
self._pre_order_visit(div)
def _pre_order_visit(self, element, inherit_from=None):
@@ -585,8 +617,7 @@ def _pre_order_visit(self, element, inherit_from=None):
# TODO - this looks highly cachable. If it turns out too much
# memory is being taken up by the caption set, cache this with a
# WeakValueDict
- layout_info = (
- self._extract_positioning_information(region_id, element))
+ layout_info = self._extract_positioning_information(region_id, element)
element.layout_info = layout_info
for child in element.contents:
self._pre_order_visit(child, inherit_from=layout_info)
@@ -597,7 +628,7 @@ def _get_region_from_ancestors(element):
region_id = None
parent = element.parent
while parent:
- region_id = parent.get('region')
+ region_id = parent.get("region")
if region_id:
break
parent = parent.parent
@@ -616,9 +647,7 @@ def _get_region_from_descendants(element):
return None
region_id = None
- child_region_ids = {
- child.get('region') for child in element.findChildren()
- }
+ child_region_ids = {child.get("region") for child in element.findChildren()}
if len(child_region_ids) > 1:
raise LookupError
if len(child_region_ids) == 1:
@@ -641,8 +670,8 @@ def _determine_region_id(cls, element):
# element could be a NavigableString. Those are dumb.
region_id = None
- if hasattr(element, 'get'):
- region_id = element.get('region')
+ if hasattr(element, "get"):
+ region_id = element.get("region")
if not region_id:
region_id = cls._get_region_from_ancestors(element)
@@ -669,7 +698,7 @@ def _extract_positioning_information(self, region_id, element):
region_tag = None
if region_id is not None:
- region_tag = self.find('region', {'xml:id': region_id})
+ region_tag = self.find("region", {"xml:id": region_id})
region_scraper = self._get_layout_info_scraper_class()(self, region_tag)
@@ -707,13 +736,12 @@ def __init__(self, document, region=None):
:param region: the region tag
"""
self.region = region
- self._styling_section = document.findChild('styling')
+ self._styling_section = document.findChild("styling")
if region:
- self.region_styles = self._get_style_sources(
- self._styling_section, region)
+ self.region_styles = self._get_style_sources(self._styling_section, region)
else:
self.region_styles = []
- self.root_element = document.find('tt')
+ self.root_element = document.find("tt")
@classmethod
def _get_style_sources(cls, styling_section, element):
@@ -736,7 +764,7 @@ def _get_style_sources(cls, styling_section, element):
styling
"""
# If we're analyzing a NavigableString, just quit
- if not hasattr(element, 'findAll'):
+ if not hasattr(element, "findAll"):
return ()
nested_styles = []
@@ -747,19 +775,19 @@ def _get_style_sources(cls, styling_section, element):
# if the parent is a
\n '
+ line = line.rstrip() + "
\n "
elif node.type_ == CaptionNode.STYLE:
line = self._recreate_span(line, node, dfxp)
@@ -207,7 +209,7 @@ def _recreate_text(self, caption, dfxp):
def _recreate_span(self, line, node, dfxp):
if node.start:
- styles = ''
+ styles = ""
content_with_style = self._recreate_style(node.content, dfxp)
for style, value in list(content_with_style.items()):
@@ -215,12 +217,12 @@ def _recreate_span(self, line, node, dfxp):
if styles:
if self.open_span:
- line = line.rstrip() + ' '
- line += f''
+ line = line.rstrip() + " "
+ line += f""
self.open_span = True
elif self.open_span:
- line = line.rstrip() + ' '
+ line = line.rstrip() + " "
self.open_span = False
return line
@@ -228,23 +230,23 @@ def _recreate_span(self, line, node, dfxp):
def _recreate_style(self, content, dfxp):
dfxp_style = {}
- if 'region' in content:
- if dfxp.find('region', {'xml:id': content['region']}):
- dfxp_style['region'] = content['region']
- if 'class' in content:
- if dfxp.find("style", {"xml:id": content['class']}):
- dfxp_style['style'] = content['class']
- if 'text-align' in content:
- dfxp_style['tts:textAlign'] = content['text-align']
- if 'italics' in content:
- dfxp_style['tts:fontStyle'] = 'italic'
- if 'font-family' in content:
- dfxp_style['tts:fontFamily'] = content['font-family']
- if 'font-size' in content:
- dfxp_style['tts:fontSize'] = content['font-size']
- if 'color' in content:
- dfxp_style['tts:color'] = content['color']
- if 'display-align' in content:
- dfxp_style['tts:displayAlign'] = content['display-align']
+ if "region" in content:
+ if dfxp.find("region", {"xml:id": content["region"]}):
+ dfxp_style["region"] = content["region"]
+ if "class" in content:
+ if dfxp.find("style", {"xml:id": content["class"]}):
+ dfxp_style["style"] = content["class"]
+ if "text-align" in content:
+ dfxp_style["tts:textAlign"] = content["text-align"]
+ if "italics" in content:
+ dfxp_style["tts:fontStyle"] = "italic"
+ if "font-family" in content:
+ dfxp_style["tts:fontFamily"] = content["font-family"]
+ if "font-size" in content:
+ dfxp_style["tts:fontSize"] = content["font-size"]
+ if "color" in content:
+ dfxp_style["tts:color"] = content["color"]
+ if "display-align" in content:
+ dfxp_style["tts:displayAlign"] = content["display-align"]
return dfxp_style
diff --git a/pycaption/exceptions.py b/pycaption/exceptions.py
index 0474c05d..7afcf2f3 100644
--- a/pycaption/exceptions.py
+++ b/pycaption/exceptions.py
@@ -2,8 +2,9 @@ class CaptionReadError(Exception):
"""
Generic error raised when the reading of the caption file failed.
"""
+
def __str__(self):
- return f'{self.__class__.__name__}({self.args[0]})'
+ return f"{self.__class__.__name__}({self.args[0]})"
class CaptionReadNoCaptions(CaptionReadError):
diff --git a/pycaption/geometry.py b/pycaption/geometry.py
index f5cc8b07..b6375ac4 100644
--- a/pycaption/geometry.py
+++ b/pycaption/geometry.py
@@ -10,7 +10,7 @@
import re
from enum import Enum
-from .exceptions import RelativizationError, CaptionReadSyntaxError
+from .exceptions import CaptionReadSyntaxError, RelativizationError
class UnitEnum(Enum):
@@ -22,11 +22,12 @@ class UnitEnum(Enum):
if unit == UnitEnum.CELL :
...
"""
- PIXEL = 'px'
- EM = 'em'
- PERCENT = '%'
- CELL = 'c'
- PT = 'pt'
+
+ PIXEL = "px"
+ EM = "em"
+ PERCENT = "%"
+ CELL = "c"
+ PT = "pt"
class VerticalAlignmentEnum(Enum):
@@ -37,18 +38,20 @@ class VerticalAlignmentEnum(Enum):
if alignment == VerticalAlignmentEnum.BOTTOM:
...
"""
- TOP = 'top'
- CENTER = 'center'
- BOTTOM = 'bottom'
+
+ TOP = "top"
+ CENTER = "center"
+ BOTTOM = "bottom"
class HorizontalAlignmentEnum(Enum):
"""Enumeration object specifying the horizontal alignment preferences"""
- LEFT = 'left'
- CENTER = 'center'
- RIGHT = 'right'
- START = 'start'
- END = 'end'
+
+ LEFT = "left"
+ CENTER = "center"
+ RIGHT = "right"
+ START = "start"
+ END = "end"
class Alignment:
@@ -63,11 +66,7 @@ def __init__(self, horizontal, vertical):
self.vertical = vertical
def __hash__(self):
- return hash(
- hash(self.horizontal) * 83
- + hash(self.vertical) * 89
- + 97
- )
+ return hash(hash(self.horizontal) * 83 + hash(self.vertical) * 89 + 97)
def __eq__(self, other):
return (
@@ -85,27 +84,26 @@ def serialized(self):
return self.horizontal, self.vertical
@classmethod
- def from_horizontal_and_vertical_align(cls, text_align=None,
- display_align=None):
+ def from_horizontal_and_vertical_align(cls, text_align=None, display_align=None):
horizontal_obj = None
vertical_obj = None
- if text_align == 'left':
+ if text_align == "left":
horizontal_obj = HorizontalAlignmentEnum.LEFT
- if text_align == 'start':
+ if text_align == "start":
horizontal_obj = HorizontalAlignmentEnum.START
- if text_align == 'center':
+ if text_align == "center":
horizontal_obj = HorizontalAlignmentEnum.CENTER
- if text_align == 'right':
+ if text_align == "right":
horizontal_obj = HorizontalAlignmentEnum.RIGHT
- if text_align == 'end':
+ if text_align == "end":
horizontal_obj = HorizontalAlignmentEnum.END
- if display_align == 'before':
+ if display_align == "before":
vertical_obj = VerticalAlignmentEnum.TOP
- if display_align == 'center':
+ if display_align == "center":
vertical_obj = VerticalAlignmentEnum.CENTER
- if display_align == 'after':
+ if display_align == "after":
vertical_obj = VerticalAlignmentEnum.BOTTOM
if not any([horizontal_obj, vertical_obj]):
@@ -125,7 +123,7 @@ def from_xml_attribute(cls, attribute):
:type attribute: str
"""
- horizontal, vertical = attribute.split(' ')
+ horizontal, vertical = attribute.split(" ")
horizontal = Size.from_string(horizontal)
vertical = Size.from_string(vertical)
@@ -146,8 +144,8 @@ def __init__(self, horizontal, vertical):
"""
for parameter in [horizontal, vertical]:
if not isinstance(parameter, Size):
- raise ValueError("Stretch must be initialized with two valid "
- "Size objects.")
+ raise ValueError("Stretch must be initialized with two valid "
+ "Size objects.")
self.horizontal = horizontal
self.vertical = vertical
@@ -158,18 +156,17 @@ def is_measured_in(self, measure_unit):
:return: True/False
"""
return (
- self.horizontal.unit == measure_unit
- and self.vertical.unit == measure_unit
+ self.horizontal.unit == measure_unit and self.vertical.unit == measure_unit
)
def __repr__(self):
- return f'
\n '
+ line = line.rstrip() + "
\n "
elif node.type_ == CaptionNode.STYLE:
line = self._recreate_line_style(line, node)
@@ -580,28 +582,28 @@ def _recreate_text(self, caption):
def _recreate_line_style(self, line, node):
if node.start:
if self.open_span:
- line = line.rstrip() + ' '
+ line = line.rstrip() + " "
line = self._recreate_span(line, node.content)
else:
if self.open_span:
- line = line.rstrip() + ' '
+ line = line.rstrip() + " "
self.open_span = False
return line
def _recreate_span(self, line, content):
- style = ''
- klass = ''
- if 'class' in content:
+ style = ""
+ klass = ""
+ if "class" in content:
klass += f' class="{content["class"]}"'
for attr, value in list(self._recreate_style(content).items()):
- style += f'{attr}:{value};'
+ style += f"{attr}:{value};"
if style or klass:
if style:
style = f' style="{style}"'
- line += f''
+ line += f""
self.open_span = True
return line
@@ -614,12 +616,12 @@ def _recreate_style(self, rules):
for key, value in list(rules.items()):
# Recreate original CSS rules from internal style
- if key == 'italics' and value is True:
- sami_style['font-style'] = 'italic'
- elif key == 'bold' and value is True:
- sami_style['font-weight'] = 'bold'
- elif key == 'underline' and value is True:
- sami_style['text-decoration'] = 'underline'
+ if key == "italics" and value is True:
+ sami_style["font-style"] = "italic"
+ elif key == "bold" and value is True:
+ sami_style["font-weight"] = "bold"
+ elif key == "underline" and value is True:
+ sami_style["text-decoration"] = "underline"
else:
sami_style[key] = value
@@ -637,14 +639,14 @@ def _encode(self, s):
class SAMIParser(HTMLParser):
def __init__(self, *args, **kw):
HTMLParser.__init__(self, *args, **kw)
- self.sami = ''
- self.line = ''
+ self.sami = ""
+ self.line = ""
self.styles = {}
self.queue = deque()
self.langs = set()
- self.last_element = ''
+ self.last_element = ""
self.name2codepoint = name2codepoint.copy()
- self.name2codepoint['apos'] = 0x0027
+ self.name2codepoint["apos"] = 0x0027
self.convert_charrefs = False
def handle_starttag(self, tag, attrs):
@@ -656,20 +658,20 @@ def handle_starttag(self, tag, attrs):
self.last_element = tag
# treat divs as spans
- if tag == 'div':
- tag = 'span'
+ if tag == "div":
+ tag = "span"
# figure out the caption language of P tags
- if tag == 'p':
+ if tag == "p":
lang = self._find_lang(attrs)
# if no language detected, set it as the default
lang = lang or DEFAULT_LANGUAGE_CODE
- attrs.append(('lang', lang))
+ attrs.append(("lang", lang))
self.langs.add(lang)
# clean-up line breaks
- if tag == 'br':
+ if tag == "br":
self.sami += "
"
# add tag to queue
else:
@@ -687,11 +689,11 @@ def handle_starttag(self, tag, attrs):
# override the parser's handling of endtags
def handle_endtag(self, tag):
# treat divs as spans
- if tag == 'div':
- tag = 'span'
+ if tag == "div":
+ tag = "span"
# handle incorrectly formatted sync/p tags
- if tag in ['p', 'sync'] and tag == self.last_element:
+ if tag in ["p", "sync"] and tag == self.last_element:
return
# close off tags in LIFO order, if matching starting tag in queue
@@ -700,18 +702,18 @@ def handle_endtag(self, tag):
self.sami += f"{closing_tag}>"
def handle_entityref(self, name):
- if name in ['gt', 'lt']:
- self.sami += f'&{name};'
+ if name in ["gt", "lt"]:
+ self.sami += f"&{name};"
else:
try:
self.sami += chr(self.name2codepoint[name])
except (KeyError, ValueError):
- self.sami += f'&{name}'
+ self.sami += f"&{name}"
- self.last_element = ''
+ self.last_element = ""
def handle_charref(self, name):
- if name[0] == 'x':
+ if name[0] == "x":
self.sami += chr(int(name[1:], 16))
else:
self.sami += chr(int(name))
@@ -719,7 +721,7 @@ def handle_charref(self, name):
# override the parser's handling of data
def handle_data(self, data):
self.sami += data
- self.last_element = ''
+ self.last_element = ""
# override the parser's feed function
def feed(self, data):
@@ -727,28 +729,27 @@ def feed(self, data):
:param data: Raw SAMI unicode string
:returns: tuple (str, dict, set)
"""
- no_cc = 'no closed captioning available'
+ no_cc = "no closed captioning available"
- if '")
- style = BeautifulSoup(data[:index], "lxml").find('style')
+ style = BeautifulSoup(data[:index], "lxml").find("style")
if style and style.contents:
- self.styles = self._css_parse(' '.join(style.contents))
+ self.styles = self._css_parse(" ".join(style.contents))
else:
self.styles = {}
# fix erroneous italics tags
- data = data.replace('', '')
+ data = data.replace("", "")
# fix awkward tags found in some SAMIs
- data = data.replace(';>', '>')
+ data = data.replace(";>", ">")
HTMLParser.feed(self, data)
# close any tags that remain in the queue
@@ -770,22 +771,22 @@ def _css_parse(self, css):
for rule in sheet:
new_style = {}
selector = rule.selectorText.lower()
- if selector[0] in ['#', '.']:
+ if selector[0] in ["#", "."]:
selector = selector[1:]
# keep any style attributes that are needed
for prop in rule.style:
- if prop.name == 'color':
+ if prop.name == "color":
try:
cv = cssutils_css.ColorValue(prop.value)
except SyntaxErr:
raise CaptionReadSyntaxError(
f"Invalid color value: {prop.value}. Check for "
f"missing # before hex values or misspelled color "
- f"values.")
+ f"values."
+ )
# Code for RGB to hex conversion comes from
# http://bit.ly/1kwfBnQ
- new_style['color'] = (f'#{cv.red:02x}{cv.green:02x}'
- f'{cv.blue:02x}')
+ new_style["color"] = f"#{cv.red:02x}{cv.green:02x}" f"{cv.blue:02x}"
else:
new_style[prop.name] = prop.value
if new_style:
@@ -796,12 +797,12 @@ def _css_parse(self, css):
def _find_lang(self, attrs):
for attr, value in attrs:
# if lang is an attribute of the tag
- if attr.lower() == 'lang':
+ if attr.lower() == "lang":
return value[:2]
# if the P tag has a class, try and find the language
- if attr.lower() == 'class':
+ if attr.lower() == "class":
try:
- return self.styles[value.lower()]['lang']
+ return self.styles[value.lower()]["lang"]
except KeyError:
pass
diff --git a/pycaption/scc/__init__.py b/pycaption/scc/__init__.py
index 5846f551..4ec8bbcd 100644
--- a/pycaption/scc/__init__.py
+++ b/pycaption/scc/__init__.py
@@ -314,6 +314,8 @@ def _flush_implicit_buffers(self, old_key=None, *args):
if not self.buffer.is_empty():
self.caption_stash.create_and_store(self.buffer, self.time)
self.buffer = self.node_creator_factory.new_creator()
+ # Reset positioning state for new caption boundary
+ self.node_creator_factory.position_tracker.reset_for_new_caption()
def _translate_line(self, line):
# ignore blank lines
@@ -367,13 +369,13 @@ def _handle_double_command(self, word):
# doubled special characters and doubled extended characters
# with only one member of each pair being displayed.
- doubled_types = (word != "94a1" and word in COMMANDS) or _is_pac_command(word) or word in SPECIAL_CHARS
+ doubled_types = (
+ (word != "94a1" and word in COMMANDS)
+ or _is_pac_command(word)
+ or word in SPECIAL_CHARS
+ )
if self.double_starter:
- doubled_types = (
- doubled_types
- or word in EXTENDED_CHARS
- or word == "94a1"
- )
+ doubled_types = doubled_types or word in EXTENDED_CHARS or word == "94a1"
if word in CUE_STARTING_COMMAND and word != self.last_command:
self.double_starter = False
@@ -426,6 +428,8 @@ def _translate_command(self, word, next_command=None):
if not self.buffer.is_empty():
self.caption_stash.create_and_store(self.buffer, self.time)
self.buffer = self.node_creator_factory.new_creator()
+ # Reset positioning state for new caption boundary
+ self.node_creator_factory.position_tracker.reset_for_new_caption()
self.time = self.time_translator.get_time()
@@ -445,6 +449,8 @@ def _translate_command(self, word, next_command=None):
if not self.buffer.is_empty():
self.caption_stash.create_and_store(self.buffer, self.time)
self.buffer = self.node_creator_factory.new_creator()
+ # Reset positioning state for new caption boundary
+ self.node_creator_factory.position_tracker.reset_for_new_caption()
# set rows to empty, configure start time for caption
self.roll_rows = []
@@ -453,6 +459,8 @@ def _translate_command(self, word, next_command=None):
# clear pop_on buffer
elif word == "94ae":
self.buffer = self.node_creator_factory.new_creator()
+ # Reset positioning state for new caption boundary
+ self.node_creator_factory.position_tracker.reset_for_new_caption()
# display pop_on buffer [End Of Caption]
elif word == "942f":
@@ -465,6 +473,8 @@ def _translate_command(self, word, next_command=None):
cue = PopOnCue(buffer=deepcopy(self.buffer), start=self.time, end=0)
self.pop_ons_queue.appendleft(cue)
self.buffer = self.node_creator_factory.new_creator()
+ # Reset positioning state for new caption boundary
+ self.node_creator_factory.position_tracker.reset_for_new_caption()
# roll up captions [Carriage Return]
elif word == "94ad":
@@ -522,6 +532,8 @@ def _roll_up(self):
# convert buffer and empty
self.caption_stash.create_and_store(self.buffer, self.time)
self.buffer = self.node_creator_factory.new_creator()
+ # Reset positioning state for new caption boundary
+ self.node_creator_factory.position_tracker.reset_for_new_caption()
# configure time
self.time = self.time_translator.get_time()
diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py
index 1bd5bf9f..addcab1f 100644
--- a/pycaption/scc/specialized_collections.py
+++ b/pycaption/scc/specialized_collections.py
@@ -340,11 +340,12 @@ def add_chars(self, *chars):
node = _InstructionNode(position=current_position)
self._collection.append(node)
- # handle a simple line break
+ # handle line break(s) - may be multiple for multi-row jumps
if self._position_tracer.is_linebreak_required():
- self._collection.append(
- _InstructionNode.create_break(position=current_position)
- )
+ for _ in range(self._position_tracer._breaks_required):
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
self._position_tracer.acknowledge_linebreak_consumed()
node = _InstructionNode.create_text(current_position)
self._collection.append(node)
@@ -412,9 +413,10 @@ def interpret_command(self, command, next_command=None):
# it should open italic tag
# if break is required, break then add style tag
if self._position_tracer.is_linebreak_required():
- self._collection.append(
- _InstructionNode.create_break(position=current_position)
- )
+ for _ in range(self._position_tracer._breaks_required):
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
self._position_tracer.acknowledge_linebreak_consumed()
self._collection.append(
_InstructionNode.create_italics_style(current_position)
@@ -433,9 +435,10 @@ def interpret_command(self, command, next_command=None):
)
self.last_style = "italics off"
if self._position_tracer.is_linebreak_required():
- self._collection.append(
- _InstructionNode.create_break(position=current_position)
- )
+ for _ in range(self._position_tracer._breaks_required):
+ self._collection.append(
+ _InstructionNode.create_break(position=current_position)
+ )
self._position_tracer.acknowledge_linebreak_consumed()
# handle mid-row codes that follows a text node
@@ -446,7 +449,7 @@ def interpret_command(self, command, next_command=None):
prev_text_node = self.get_previous_text_node()
prev_node_is_break = prev_text_node is not None and any(
x.is_explicit_break()
- for x in self._collection[self._collection.index(prev_text_node):]
+ for x in self._collection[self._collection.index(prev_text_node) :]
)
if (
command in MID_ROW_CODES
diff --git a/pycaption/scc/state_machines.py b/pycaption/scc/state_machines.py
index de5a4e87..74ebcbee 100644
--- a/pycaption/scc/state_machines.py
+++ b/pycaption/scc/state_machines.py
@@ -12,7 +12,7 @@ def __init__(self, positioning=None):
:type positioning: tuple[int]
"""
self._positions = [positioning]
- self._break_required = False
+ self._breaks_required = 0
self._repositioning_required = False
# Since the actual column is not applied when encountering a line break
# this attribute is used to store it and determine by comparison if the
@@ -24,6 +24,10 @@ def update_positioning(self, positioning):
to as to be able to tell if it was a trivial change (a simple line
break) or not.
+ Strategy:
+ - Small jumps (1-3 rows): Use line breaks to preserve visual spacing
+ - Large jumps (4+ rows): Use repositioning (creates new cue)
+
:type positioning: tuple[int]
:param positioning: a tuple (row, col)
"""
@@ -36,17 +40,35 @@ def update_positioning(self, positioning):
return
row, col = current
- if self._break_required:
+ if self._breaks_required:
col = self._last_column
new_row, new_col = positioning
is_tab_offset = new_row == row and col + 1 <= new_col <= col + 3
- # One line below will be treated as line break, not repositioning
- if new_row == row + 1:
- self._positions.append((new_row, col))
- self._break_required = True
- self._last_column = new_col
+
+ # Threshold for when to use breaks vs repositioning
+ # Jumps of 4+ rows will trigger repositioning instead of adding breaks
+ MAX_BREAKS_THRESHOLD = 3
+
+ # Handle row jumps
+ if new_row > row:
+ row_diff = new_row - row
+
+ # Small jumps (1-3 rows): Use line breaks to preserve visual spacing
+ if row_diff <= MAX_BREAKS_THRESHOLD:
+ self._positions.append((new_row, col))
+ # Add breaks equal to row difference
+ # Row N -> N+1: 1 break
+ # Row N -> N+2: 2 breaks (preserves 1 blank line)
+ # Row N -> N+3: 3 breaks (preserves 2 blank lines)
+ self._breaks_required = row_diff
+ self._last_column = new_col
+ # Large jumps (4+ rows): Use repositioning (new cue)
+ else:
+ # Reset position - this triggers repositioning
+ self._positions = [positioning]
+ self._repositioning_required = True
# Tab offsets after line breaks will be ignored to avoid repositioning
- elif self._break_required and is_tab_offset:
+ elif self._breaks_required and is_tab_offset:
return
# force not to reposition on the same coordinates
elif positioning == current:
@@ -87,11 +109,24 @@ def is_linebreak_required(self):
"""If the current position is simply one line below the previous.
:rtype: bool
"""
- return self._break_required
+ return self._breaks_required > 0
def acknowledge_linebreak_consumed(self):
"""Call to acknowledge that the line required was consumed"""
- self._break_required = False
+ self._breaks_required = 0
+
+ def reset_for_new_caption(self):
+ """Reset positioning state for a new caption boundary (e.g., EOC).
+
+ This ensures that breaks and repositioning state from the previous
+ caption do not bleed into the new caption. The position list is reset
+ to allow the next caption to set its position independently.
+ """
+ self._breaks_required = 0
+ self._repositioning_required = False
+ self._last_column = None
+ # Reset positions to None so the next PAC sets position fresh
+ self._positions = [None]
class DefaultProvidingPositionTracker(_PositioningTracker):
diff --git a/pycaption/scc/translator.py b/pycaption/scc/translator.py
index aba7f8a2..b2951ed1 100644
--- a/pycaption/scc/translator.py
+++ b/pycaption/scc/translator.py
@@ -1,7 +1,7 @@
from pycaption.scc.constants import ALL_CHARACTERS, COMMAND_LABELS
-def translate_scc(scc_content, brackets='[]'):
+def translate_scc(scc_content, brackets="[]"):
"""
Replaces hexadecimal words with their meaning
@@ -16,7 +16,7 @@ def translate_scc(scc_content, brackets='[]'):
:return: Translated SCC captions
:rtype: str
"""
- opening_bracket, closing_bracket = brackets if brackets else ('', '')
+ opening_bracket, closing_bracket = brackets if brackets else ("", "")
scc_elements = set(scc_content.split())
for elem in scc_elements:
name = COMMAND_LABELS.get(elem, ALL_CHARACTERS.get(elem))
@@ -28,5 +28,6 @@ def translate_scc(scc_content, brackets='[]'):
name = f"{char1}{char2}"
if name:
scc_content = scc_content.replace(
- elem, f"{opening_bracket}{name}{closing_bracket}")
+ elem, f"{opening_bracket}{name}{closing_bracket}"
+ )
return scc_content
diff --git a/pycaption/srt.py b/pycaption/srt.py
index 3351bad0..7b9bd389 100644
--- a/pycaption/srt.py
+++ b/pycaption/srt.py
@@ -1,22 +1,20 @@
from copy import deepcopy
-from .base import (
- BaseReader, BaseWriter, CaptionSet, CaptionList, Caption, CaptionNode,
-)
+from .base import BaseReader, BaseWriter, Caption, CaptionList, CaptionNode, CaptionSet
from .exceptions import CaptionReadNoCaptions, InvalidInputError
class SRTReader(BaseReader):
def detect(self, content):
lines = content.splitlines()
- if lines[0].isdigit() and '-->' in lines[1]:
+ if lines[0].isdigit() and "-->" in lines[1]:
return True
else:
return False
- def read(self, content, lang='en-US'):
+ def read(self, content, lang="en-US"):
if not isinstance(content, str):
- raise InvalidInputError('The content is not a unicode string.')
+ raise InvalidInputError("The content is not a unicode string.")
lines = content.splitlines()
start_line = 0
@@ -28,15 +26,15 @@ def read(self, content, lang='en-US'):
end_line = self._find_text_line(start_line, lines)
- timing = lines[start_line + 1].split('-->')
- start = self._srttomicro(timing[0].strip(' \r\n'))
- end = self._srttomicro(timing[1].strip(' \r\n'))
+ timing = lines[start_line + 1].split("-->")
+ start = self._srttomicro(timing[0].strip(" \r\n"))
+ end = self._srttomicro(timing[1].strip(" \r\n"))
nodes = []
- for line in lines[start_line + 2:end_line - 1]:
+ for line in lines[start_line + 2 : end_line - 1]:
# skip extra blank lines
- if not nodes or line != '':
+ if not nodes or line != "":
nodes.append(CaptionNode.create_text(line))
nodes.append(CaptionNode.create_break())
@@ -56,14 +54,16 @@ def read(self, content, lang='en-US'):
return caption_set
def _srttomicro(self, stamp):
- timesplit = stamp.split(':')
- if ',' not in timesplit[2]:
- timesplit[2] += ',000'
- secsplit = timesplit[2].split(',')
- microseconds = (int(timesplit[0]) * 3600000000
- + int(timesplit[1]) * 60000000
- + int(secsplit[0]) * 1000000
- + int(secsplit[1]) * 1000)
+ timesplit = stamp.split(":")
+ if "," not in timesplit[2]:
+ timesplit[2] += ",000"
+ secsplit = timesplit[2].split(",")
+ microseconds = (
+ int(timesplit[0]) * 3600000000
+ + int(timesplit[1]) * 60000000
+ + int(secsplit[0]) * 1000000
+ + int(secsplit[1]) * 1000
+ )
return microseconds
@@ -89,11 +89,9 @@ def write(self, caption_set):
srt_captions = []
for lang in caption_set.get_languages():
- srt_captions.append(
- self._recreate_lang(caption_set.get_captions(lang))
- )
+ srt_captions.append(self._recreate_lang(caption_set.get_captions(lang)))
- caption_content = 'MULTI-LANGUAGE SRT\n'.join(srt_captions)
+ caption_content = "MULTI-LANGUAGE SRT\n".join(srt_captions)
return caption_content
def _recreate_lang(self, captions):
@@ -106,30 +104,35 @@ def _recreate_lang(self, captions):
for caption in captions[1:]:
# Merge if the timestamp is the same as last caption
if (caption.start, caption.end) == (
- merged_captions[-1].start, merged_captions[-1].end):
+ merged_captions[-1].start,
+ merged_captions[-1].end,
+ ):
merged_captions[-1] = Caption(
start=caption.start,
end=caption.end,
- nodes=(merged_captions[-1].nodes
- + [CaptionNode.create_break()]
- + caption.nodes))
+ nodes=(
+ merged_captions[-1].nodes
+ + [CaptionNode.create_break()]
+ + caption.nodes
+ ),
+ )
else:
# Different timestamp, end of merging, append new caption
merged_captions.append(caption)
captions = merged_captions
- srt = ''
+ srt = ""
count = 1
for caption in captions:
- srt += f'{count}\n'
+ srt += f"{count}\n"
- start = caption.format_start(msec_separator=',')
- end = caption.format_end(msec_separator=',')
+ start = caption.format_start(msec_separator=",")
+ end = caption.format_end(msec_separator=",")
- srt += f'{start[:12]} --> {end[:12]}\n'
+ srt += f"{start[:12]} --> {end[:12]}\n"
- new_content = ''
+ new_content = ""
for node in caption.nodes:
new_content = self._recreate_line(new_content, node)
@@ -143,8 +146,8 @@ def _recreate_lang(self, captions):
def _recreate_line(self, srt, line):
if line.type_ == CaptionNode.TEXT:
- return srt + f'{line.content} '
+ return srt + f"{line.content} "
elif line.type_ == CaptionNode.BREAK:
- return srt + '\n'
+ return srt + "\n"
else:
return srt
diff --git a/pycaption/utils.py b/pycaption/utils.py
index e8ff6fbc..0be4346f 100644
--- a/pycaption/utils.py
+++ b/pycaption/utils.py
@@ -4,7 +4,7 @@ def is_leaf(element):
considered a leaf if it is either NavigableString or the "br" tag
:param element: A BeautifulSoup tag or NavigableString
"""
- name = getattr(element, 'name', None)
- if not name or name == 'br':
+ name = getattr(element, "name", None)
+ if not name or name == "br":
return True
return False
diff --git a/pycaption/webvtt.py b/pycaption/webvtt.py
index c3573b29..4953e465 100644
--- a/pycaption/webvtt.py
+++ b/pycaption/webvtt.py
@@ -77,7 +77,6 @@ def _parse(self, lines):
found_timing = False
for i, line in enumerate(lines):
-
if "-->" in line:
found_timing = True
timing_line = i
diff --git a/tests/conftest.py b/tests/conftest.py
index acb97edb..d1d7d5ef 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,104 +1,170 @@
from tests.fixtures.dfxp import ( # noqa: F401
- sample_dfxp, sample_dfxp_with_inline_style, sample_dfxp_with_defined_style,
- sample_dfxp_with_inherited_style, sample_dfxp_without_region_and_style,
- sample_dfxp_with_positioning, sample_dfxp_with_relativized_positioning,
- sample_dfxp_empty, sample_dfxp_syntax_error,
+ dfxp_style_region_align_conflict,
+ dfxp_with_concurrent_captions,
+ sample_dfxp,
+ sample_dfxp_default_styling_p_tags,
+ sample_dfxp_empty,
+ sample_dfxp_empty_cue,
+ sample_dfxp_empty_cue_output,
+ sample_dfxp_empty_paragraph,
+ sample_dfxp_for_legacy_writer_input,
+ sample_dfxp_for_legacy_writer_output,
+ sample_dfxp_from_sami_with_bad_span_align,
+ sample_dfxp_from_sami_with_lang_margins,
+ sample_dfxp_from_sami_with_margins,
sample_dfxp_from_sami_with_positioning,
- sample_dfxp_long_cue, sample_dfxp_long_cue_fit_to_screen,
- sample_dfxp_from_sami_with_margins, sample_dfxp_from_sami_with_lang_margins,
- sample_dfxp_from_sami_with_span, sample_dfxp_from_sami_with_bad_span_align,
+ sample_dfxp_from_sami_with_span,
+ sample_dfxp_from_scc_output,
+ sample_dfxp_incorrect_time_format,
sample_dfxp_invalid_but_supported_positioning_input,
sample_dfxp_invalid_but_supported_positioning_output,
- sample_dfxp_multiple_regions_input, sample_dfxp_multiple_regions_output,
- sample_dfxp_to_render_with_only_default_positioning_input,
- sample_dfxp_output, sample_dfxp_style_tag_with_no_xml_id_input,
- sample_dfxp_style_tag_with_no_xml_id_output, sample_dfxp_from_scc_output,
- sample_dfxp_with_properly_closing_spans_output,
- sample_dfxp_for_legacy_writer_input, sample_dfxp_for_legacy_writer_output,
- sample_dfxp_with_templated_style, sample_dfxp_with_escaped_apostrophe,
- sample_dfxp_with_alternative_timing_formats, sample_dfxp_empty_paragraph,
- sample_dfxp_only_spaces_paragraph, sample_dfxp_incorrect_time_format,
- sample_dfxp_missing_begin, sample_dfxp_missing_end_and_dur,
- sample_dfxp_with_frame_timing, sample_dfxp_empty_cue,
- sample_dfxp_empty_cue_output, sample_dfxp_default_styling_p_tags,
sample_dfxp_invalid_positioning_value_template,
+ sample_dfxp_long_cue,
+ sample_dfxp_long_cue_fit_to_screen,
+ sample_dfxp_missing_begin,
+ sample_dfxp_missing_end_and_dur,
sample_dfxp_multiple_captions_with_the_same_timing,
- sample_dfxp_with_ampersand_character, sample_dfxp_with_nested_spans,
- dfxp_style_region_align_conflict, dfxp_with_concurrent_captions,
+ sample_dfxp_multiple_regions_input,
+ sample_dfxp_multiple_regions_output,
+ sample_dfxp_only_spaces_paragraph,
+ sample_dfxp_output,
+ sample_dfxp_style_tag_with_no_xml_id_input,
+ sample_dfxp_style_tag_with_no_xml_id_output,
+ sample_dfxp_syntax_error,
+ sample_dfxp_to_render_with_only_default_positioning_input,
+ sample_dfxp_with_alternative_timing_formats,
+ sample_dfxp_with_ampersand_character,
+ sample_dfxp_with_defined_style,
+ sample_dfxp_with_escaped_apostrophe,
+ sample_dfxp_with_frame_timing,
+ sample_dfxp_with_inherited_style,
+ sample_dfxp_with_inline_style,
+ sample_dfxp_with_nested_spans,
+ sample_dfxp_with_positioning,
+ sample_dfxp_with_properly_closing_spans_output,
+ sample_dfxp_with_relativized_positioning,
+ sample_dfxp_with_templated_style,
+ sample_dfxp_without_region_and_style,
)
-from tests.fixtures.microdvd import ( # noqa: F401
- sample_microdvd, sample_microdvd_2,
- sample_microdvd_invalid_format, missing_fps_sample_microdvd,
- sample_microdvd_empty, sample_microdvd_empty_cue_output,
+from tests.fixtures.microdvd import missing_fps_sample_microdvd # noqa: F401
+from tests.fixtures.microdvd import (
+ sample_microdvd,
+ sample_microdvd_2,
+ sample_microdvd_empty,
+ sample_microdvd_empty_cue_output,
+ sample_microdvd_invalid_format,
)
-from tests.fixtures.sami import ( # noqa: F401
- sample_sami, sample_sami_with_style_tags,
- sample_sami_with_css_inline_style, sample_sami_with_css_id_style,
- sample_sami_empty, sample_sami_syntax_error,
- sample_sami_double_br, sample_sami_partial_margins,
- sample_sami_partial_margins_relativized, sample_sami_lang_margin,
- sample_sami_with_span, sample_sami_with_bad_span_align,
- sample_sami_with_bad_div_align, sample_sami_with_p_align,
- sample_sami_with_p_and_span_align, sample_sami_with_multiple_span_aligns,
- sample_sami_no_lang, sample_sami_with_lang, sample_sami_with_multi_lang,
- sample_sami_with_multiple_p, sample_sami_empty_cue_output,
- sample_sami_with_invalid_inline_style,
- sample_sami_including_hexadecimal_charref,
+from tests.fixtures.sami import sample_sami # noqa: F401
+from tests.fixtures.sami import (
+ sample_sami_double_br,
+ sample_sami_empty,
+ sample_sami_empty_cue_output,
+ sample_sami_from_dfxp_with_nested_spans,
sample_sami_including_decimal_charref,
- sample_sami_including_html5_entityref, sample_sami_with_unclosed_tag,
- sample_sami_with_inline_lang, sample_sami_from_dfxp_with_nested_spans,
- sample_sami_with_separate_multi_lang, sample_sami_missing_start
+ sample_sami_including_hexadecimal_charref,
+ sample_sami_including_html5_entityref,
+ sample_sami_lang_margin,
+ sample_sami_missing_start,
+ sample_sami_no_lang,
+ sample_sami_partial_margins,
+ sample_sami_partial_margins_relativized,
+ sample_sami_syntax_error,
+ sample_sami_with_bad_div_align,
+ sample_sami_with_bad_span_align,
+ sample_sami_with_css_id_style,
+ sample_sami_with_css_inline_style,
+ sample_sami_with_inline_lang,
+ sample_sami_with_invalid_inline_style,
+ sample_sami_with_lang,
+ sample_sami_with_multi_lang,
+ sample_sami_with_multiple_p,
+ sample_sami_with_multiple_span_aligns,
+ sample_sami_with_p_align,
+ sample_sami_with_p_and_span_align,
+ sample_sami_with_separate_multi_lang,
+ sample_sami_with_span,
+ sample_sami_with_style_tags,
+ sample_sami_with_unclosed_tag,
)
from tests.fixtures.scc import ( # noqa: F401
+ sample_no_positioning_at_all_scc,
sample_scc_created_dfxp_with_wrongly_closing_spans,
- scc_that_generates_webvtt_with_proper_newlines,
- sample_scc_produces_captions_with_start_and_end_time_the_same,
- sample_scc_pop_on, sample_scc_multiple_positioning, sample_scc_with_italics,
- sample_scc_empty, sample_scc_roll_up_ru2, sample_scc_roll_up_ru3,
- sample_no_positioning_at_all_scc, sample_scc_with_line_too_long,
- sample_scc_no_explicit_end_to_last_caption, sample_scc_flashing_cue,
- sample_scc_eoc_first_command, sample_scc_with_extended_characters,
- sample_scc_with_ampersand_character, sample_scc_multiple_formats,
- sample_scc_duplicate_tab_offset, sample_scc_duplicate_special_characters,
- sample_scc_tab_offset, sample_scc_with_unknown_commands,
- sample_scc_special_and_extended_characters, sample_scc_mid_row_before_text_pop,
- sample_scc_mid_row_before_text_roll, sample_scc_mid_row_before_text_paint,
+ sample_scc_duplicate_special_characters,
+ sample_scc_duplicate_tab_offset,
+ sample_scc_empty,
+ sample_scc_eoc_first_command,
+ sample_scc_flashing_cue,
+ sample_scc_mid_row_before_text_paint,
+ sample_scc_mid_row_before_text_pop,
+ sample_scc_mid_row_before_text_roll,
+ sample_scc_mid_row_following_text_no_text_before_italics_off_paint,
sample_scc_mid_row_following_text_no_text_before_italics_off_pop,
sample_scc_mid_row_following_text_no_text_before_italics_off_roll,
- sample_scc_mid_row_following_text_no_text_before_italics_off_paint,
+ sample_scc_mid_row_following_text_no_text_before_italics_on_paint,
sample_scc_mid_row_following_text_no_text_before_italics_on_pop,
sample_scc_mid_row_following_text_no_text_before_italics_on_roll,
- sample_scc_mid_row_following_text_no_text_before_italics_on_paint,
+ sample_scc_mid_row_with_space_before_paint,
sample_scc_mid_row_with_space_before_pop,
sample_scc_mid_row_with_space_before_roll,
- sample_scc_mid_row_with_space_before_paint,
+ sample_scc_multiple_formats,
+ sample_scc_multiple_positioning,
+ sample_scc_no_explicit_end_to_last_caption,
+ sample_scc_pop_on,
+ sample_scc_produces_captions_with_start_and_end_time_the_same,
+ sample_scc_roll_up_ru2,
+ sample_scc_roll_up_ru3,
+ sample_scc_special_and_extended_characters,
+ sample_scc_tab_offset,
+ sample_scc_with_ampersand_character,
+ sample_scc_with_extended_characters,
+ sample_scc_with_italics,
+ sample_scc_with_line_too_long,
+ sample_scc_with_spaces_at_eol_paint,
sample_scc_with_spaces_at_eol_pop,
sample_scc_with_spaces_at_eol_roll,
- sample_scc_with_spaces_at_eol_paint,
+ sample_scc_with_unknown_commands,
+ scc_that_generates_webvtt_with_proper_newlines,
)
-from tests.fixtures.srt import ( # noqa: F401
- sample_srt, sample_srt_ascii, sample_srt_numeric, sample_srt_empty,
- sample_srt_blank_lines, sample_srt_trailing_blanks,
- samples_srt_same_time, sample_srt_empty_cue_output,
+from tests.fixtures.srt import (
+ sample_srt,
+ sample_srt_ascii, # noqa: F401
+ sample_srt_blank_lines,
+ sample_srt_empty,
+ sample_srt_empty_cue_output,
+ sample_srt_numeric,
sample_srt_timestamps_without_microseconds,
+ sample_srt_trailing_blanks,
+ samples_srt_same_time,
)
from tests.fixtures.translated_scc import ( # noqa: F401
- sample_translated_scc_custom_brackets, sample_translated_scc_success,
- sample_translated_scc_commands_not_found, sample_translated_scc_no_brackets,
- sample_translated_scc_special_and_extended_characters
+ sample_translated_scc_commands_not_found,
+ sample_translated_scc_custom_brackets,
+ sample_translated_scc_no_brackets,
+ sample_translated_scc_special_and_extended_characters,
+ sample_translated_scc_success,
)
from tests.fixtures.webvtt import ( # noqa: F401
- sample_webvtt, sample_webvtt_from_dfxp, sample_webvtt_from_sami,
- sample_webvtt_from_sami_with_style, sample_webvtt_from_sami_with_id_style,
+ sample_webvtt,
+ sample_webvtt_2,
+ sample_webvtt_double_br,
+ sample_webvtt_empty,
+ sample_webvtt_empty_cue,
+ sample_webvtt_empty_cue_output,
+ sample_webvtt_from_dfxp,
+ sample_webvtt_from_dfxp_with_positioning_and_style,
sample_webvtt_from_dfxp_with_style,
+ sample_webvtt_from_sami,
+ sample_webvtt_from_sami_with_id_style,
+ sample_webvtt_from_sami_with_style,
+ sample_webvtt_from_scc_properly_writes_newlines_output,
+ sample_webvtt_from_srt,
+ sample_webvtt_from_webvtt,
sample_webvtt_keeps_positioning,
- sample_webvtt_from_dfxp_with_positioning_and_style,
- sample_webvtt_from_srt, sample_webvtt_from_webvtt,
- sample_webvtt_2, sample_webvtt_empty, sample_webvtt_double_br,
- sample_webvtt_output_long_cue, webvtt_from_dfxp_with_conflicting_align,
+ sample_webvtt_last_cue_zero_start,
+ sample_webvtt_multi_lang_de,
+ sample_webvtt_multi_lang_en,
+ sample_webvtt_output_long_cue,
+ sample_webvtt_timestamps,
sample_webvtt_with_cue_settings,
- sample_webvtt_from_scc_properly_writes_newlines_output,
- sample_webvtt_last_cue_zero_start, sample_webvtt_empty_cue,
- sample_webvtt_multi_lang_en, sample_webvtt_multi_lang_de,
- sample_webvtt_empty_cue_output, sample_webvtt_timestamps
+ webvtt_from_dfxp_with_conflicting_align,
)
diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py
index 864f3e7a..277252e2 100644
--- a/tests/fixtures/dfxp.py
+++ b/tests/fixtures/dfxp.py
@@ -915,16 +915,15 @@ def sample_dfxp_from_scc_output():
efef
ijij
klkl
uvuv
wxwx
6767
8989
@@ -977,7 +974,7 @@ def sample_dfxp_from_scc_output():
"""
-@pytest.fixture(scope="session")
+@pytest.fixture()
def sample_dfxp_with_properly_closing_spans_output():
return """\
@@ -992,7 +989,7 @@ def sample_dfxp_with_properly_closing_spans_output():
cc
-