Skip to content

Commit

Permalink
feat(checks): add reStructuredText syntax checking
Browse files Browse the repository at this point in the history
Issue #8446
  • Loading branch information
nijel committed Jan 16, 2025
1 parent 50fe3c2 commit 992700d
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 16 deletions.
1 change: 1 addition & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Not yet released.
**New features**

* :ref:`check-rst-references` check to validate reStructuredText references.
* :ref:`check-rst-syntax` check to validate reStructuredText syntax.

**Improvements**

Expand Down
15 changes: 15 additions & 0 deletions docs/user/checks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,21 @@ translation file or defined manually using ``regex`` flag:
regex:^foo|bar$
.. _check-rst-syntax:

reStructuredText syntax error
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. versionadded:: 5.10

:Summary: reStructuredText syntax error in the translation.
:Scope: translated strings
:Check class: ``weblate.checks.markup.RSTSyntaxCheck``
:Check identifier: ``rst-syntax``
:Flag to enable: ``rst-text``
:Flag to ignore: ``ignore-rst-syntax``

reStructuredText syntax error in the translation.

.. _check-reused:

Expand Down
33 changes: 23 additions & 10 deletions weblate/checks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
from __future__ import annotations

import re
from typing import TYPE_CHECKING, Any, Literal
from typing import TYPE_CHECKING, Any, TypedDict

import sentry_sdk
from django.http import Http404
from django.utils.html import format_html, format_html_join
from django.utils.safestring import mark_safe
from django.utils.translation import gettext
from lxml import etree
from siphashc import siphash
Expand All @@ -28,7 +29,11 @@
from .flags import Flags
from .models import Check

MissingExtraDict = dict[Literal["missing", "extra"], list[str]]

class MissingExtraDict(TypedDict, total=False):
missing: list[str]
extra: list[str]
errors: list[str]


class BaseCheck:
Expand Down Expand Up @@ -321,19 +326,27 @@ def get_extra_text(self, values: Iterable[str]) -> StrOrPromise:
gettext("The following format strings are extra: {}"), values
)

def get_errors_text(self, values: Iterable[str]) -> StrOrPromise:
return format_html_join(
mark_safe("<br />"), # noqa: S308
"{}",
(
(value,)
for value in (gettext("The following errors were found:"), *values)
),
)

def format_string(self, string: str) -> str:
"""Format parsed format string into human readable value."""
return string

def format_result(self, result: MissingExtraDict) -> Iterable[StrOrPromise]:
if result["missing"]:
yield self.get_missing_text(
self.format_string(x) for x in set(result["missing"])
)
if result["extra"]:
yield self.get_extra_text(
self.format_string(x) for x in set(result["extra"])
)
if missing := result.get("missing"):
yield self.get_missing_text(self.format_string(x) for x in set(missing))
if extra := result.get("extra"):
yield self.get_extra_text(self.format_string(x) for x in set(extra))
if errors := result.get("errors"):
yield self.get_errors_text(set(errors))


class SourceCheck(BaseCheck):
Expand Down
99 changes: 93 additions & 6 deletions weblate/checks/markup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import re
from collections import defaultdict
from functools import lru_cache
from typing import TYPE_CHECKING

from django.core.exceptions import ValidationError
Expand All @@ -14,6 +15,9 @@
from django.utils.html import format_html_join
from django.utils.safestring import mark_safe
from django.utils.translation import gettext_lazy
from docutils import utils

Check failure on line 18 in weblate/checks/markup.py

View workflow job for this annotation

GitHub Actions / mypy

Library stubs not installed for "docutils"
from docutils.core import Publisher

Check failure on line 19 in weblate/checks/markup.py

View workflow job for this annotation

GitHub Actions / mypy

Library stubs not installed for "docutils.core"
from docutils.nodes import Element, system_message

Check failure on line 20 in weblate/checks/markup.py

View workflow job for this annotation

GitHub Actions / mypy

Library stubs not installed for "docutils.nodes"

from weblate.checks.base import MissingExtraDict, TargetCheck
from weblate.utils.html import (
Expand Down Expand Up @@ -80,6 +84,8 @@
":kbd:",
}

RST_ROLE_RE = re.compile(r"""Unknown interpreted text role "([^"]*)"\.""")


def strip_entities(text):
"""Strip all HTML entities (we don't care about them)."""
Expand Down Expand Up @@ -369,18 +375,21 @@ def check_single(self, source: str, target: str, unit: Unit):
return cleaned_target != target


class RSTReferencesCheck(TargetCheck):
check_id = "rst-references"
name = gettext_lazy("Inconsistent reStructuredText references")
description = gettext_lazy(
"Inconsistent reStructuredText term references in the translated message."
)
class RSTBaseCheck(TargetCheck):
default_disabled = True

def __init__(self) -> None:
super().__init__()
self.enable_string = "rst-text"


class RSTReferencesCheck(RSTBaseCheck):
check_id = "rst-references"
name = gettext_lazy("Inconsistent reStructuredText references")
description = gettext_lazy(
"Inconsistent reStructuredText term references in the translated message."
)

def extract_references(self, text: str) -> dict[str, str]:
return {
role
Expand Down Expand Up @@ -435,3 +444,81 @@ def check_highlight(self, source: str, unit: Unit):
return
for match in RST_REF_MATCH.finditer(source):
yield match.start(0), match.end(0), match.group(0)


@lru_cache(maxsize=512)
def validate_rst_snippet(
snippet: str, source_tags: tuple[str] | None = None
) -> tuple[list[str], list[str]]:
publisher = Publisher(settings=None)
publisher.set_components("standalone", "restructuredtext", "pseudoxml")
settings = publisher.get_settings(halt_level=5)
publisher.set_io()
document = utils.new_document(None, settings)
document.reporter.stream = None

errors: list[str] = []
roles: list[str] = []

def error_collector(data: system_message) -> None:
"""Save the error."""
message = Element.astext(data)
if match := RST_ROLE_RE.match(message):
role = match.group(1)
roles.append(role)
if source_tags is not None and role in source_tags:
# Skip if the role was found in the source
return
elif "No role entry" in message:
# Ignore as this duplicates Unknown interpreted in our case
return
errors.append(message)

document.reporter.attach_observer(error_collector)
publisher.reader.parser.parse(snippet, document)
return errors, roles


class RSTSyntaxCheck(RSTBaseCheck):
check_id = "rst-syntax"
name = gettext_lazy("reStructuredText syntax error")
description = gettext_lazy("reStructuredText syntax error in the translation.")

def check_single(

Check failure on line 487 in weblate/checks/markup.py

View workflow job for this annotation

GitHub Actions / mypy

Return type "bool | MissingExtraDict" of "check_single" incompatible with return type "bool" in supertype "TargetCheck"
self, source: str, target: str, unit: Unit
) -> bool | MissingExtraDict:
_errors, source_tags = validate_rst_snippet(source)
errors, _target_tags = validate_rst_snippet(target, tuple(source_tags))

if errors:
return {"errors": errors}
return False

def get_description(self, check_obj: Check) -> StrOrPromise:
unit = check_obj.unit

errors: list[StrOrPromise] = []
results: MissingExtraDict = defaultdict(list)

Check failure on line 501 in weblate/checks/markup.py

View workflow job for this annotation

GitHub Actions / mypy

Incompatible types in assignment (expression has type "defaultdict[Never, list[Never]]", variable has type "MissingExtraDict")

# Merge plurals
for result in self.check_target_generator(
unit.get_source_plurals(), unit.get_target_plurals(), unit
):
if isinstance(result, dict):
for key, value in result.items():
results[key].extend(value)

Check failure on line 509 in weblate/checks/markup.py

View workflow job for this annotation

GitHub Actions / mypy

TypedDict key must be a string literal; expected one of ("missing", "extra", "errors")
if results:
errors.extend(self.format_result(results))
if errors:
return format_html_join(
mark_safe("<br />"), # noqa: S308
"{}",
((error,) for error in errors),
)
return super().get_description(check_obj)

def check_highlight(self, source: str, unit: Unit):
if self.should_skip(unit):
return
for match in RST_REF_MATCH.finditer(source):
yield match.start(0), match.end(0), match.group(0)
1 change: 1 addition & 0 deletions weblate/checks/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ class WeblateChecksConf(AppConf):
"weblate.checks.markup.URLCheck",
"weblate.checks.markup.SafeHTMLCheck",
"weblate.checks.markup.RSTReferencesCheck",
"weblate.checks.markup.RSTSyntaxCheck",
"weblate.checks.placeholders.PlaceholderCheck",
"weblate.checks.placeholders.RegexCheck",
"weblate.checks.duplicate.DuplicateCheck",
Expand Down
55 changes: 55 additions & 0 deletions weblate/checks/tests/test_markup_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
MarkdownRefLinkCheck,
MarkdownSyntaxCheck,
RSTReferencesCheck,
RSTSyntaxCheck,
SafeHTMLCheck,
URLCheck,
XMLTagsCheck,
Expand Down Expand Up @@ -466,3 +467,57 @@ def test_translatable(self) -> None:
"rst-text",
),
)


class RSTSyntaxCheckTest(CheckTestCase):
check = RSTSyntaxCheck()

def setUp(self) -> None:
super().setUp()
base = "``foo``"
self.test_good_matching = (base, base, "rst-text")
self.test_good_none = (base, base, "")
self.test_good_flag = ("string", "string", "rst-text")
self.test_failure_1 = (base, "``foo`", "rst-text")
self.test_failure_2 = (base, ":ref:`foo`bar", "rst-text")
self.test_failure_3 = (base, ":ref:`foo bar` `", "rst-text")

def test_roles(self) -> None:
self.do_test(
False,
(
":abcde:`Ctrl+Home`",
":abcde:`Ctrl+Home`",
"rst-text",
),
)
self.do_test(
True,
(
":abcde:`Ctrl+Home`",
":defgh:`Ctrl+Home`",
"rst-text",
),
)

def test_description(self) -> None:
unit = Unit(
source=":ref:`bar`",
target=":ref:`bar",
extra_flags="rst-text",
translation=Translation(
component=Component(
file_format="po",
source_language=Language(code="en"),
),
plural=Plural(),
),
)
check = Check(unit=unit)
self.assertHTMLEqual(
self.check.get_description(check),
"""
The following errors were found:<br>
Inline interpreted text or phrase reference start-string without end-string.
""",
)
1 change: 1 addition & 0 deletions weblate/settings_docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,7 @@
"weblate.checks.markup.URLCheck",
"weblate.checks.markup.SafeHTMLCheck",
"weblate.checks.markup.RSTReferencesCheck",
"weblate.checks.markup.RSTSyntaxCheck",
"weblate.checks.placeholders.PlaceholderCheck",
"weblate.checks.placeholders.RegexCheck",
"weblate.checks.duplicate.DuplicateCheck",
Expand Down
1 change: 1 addition & 0 deletions weblate/settings_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,7 @@
# "weblate.checks.markup.URLCheck",
# "weblate.checks.markup.SafeHTMLCheck",
# "weblate.checks.markup.RSTReferencesCheck",
# "weblate.checks.markup.RSTSyntaxCheck",
# "weblate.checks.placeholders.PlaceholderCheck",
# "weblate.checks.placeholders.RegexCheck",
# "weblate.checks.duplicate.DuplicateCheck",
Expand Down

0 comments on commit 992700d

Please sign in to comment.