From b2136e1088111801472da8b8b85a9af51bee584a Mon Sep 17 00:00:00 2001 From: Pavel Kvach Date: Tue, 16 Apr 2024 07:59:55 +0300 Subject: [PATCH] utils/html: Remove the hard-coded list of allowed elements and attributes These changes provide full control over the management of "allowed-elements" and "allowed-attributes" through the configuration file. Fixes https://github.com/isso-comments/isso/issues/751 --- contrib/isso-dev.cfg | 5 +++-- docs/docs/reference/server-config.rst | 26 ++++---------------------- isso/isso.cfg | 16 +++++++--------- isso/tests/test_html.py | 8 ++++---- isso/utils/html.py | 18 +++++------------- 5 files changed, 23 insertions(+), 50 deletions(-) diff --git a/contrib/isso-dev.cfg b/contrib/isso-dev.cfg index d3820ee5..c5968d75 100644 --- a/contrib/isso-dev.cfg +++ b/contrib/isso-dev.cfg @@ -37,8 +37,9 @@ reply-to-self = true [markup] options = autolink, fenced-code, no-intra-emphasis, strikethrough, superscript flags = -allowed-elements = -allowed-attributes = +allowed-elements = a, p, hr, br, ol, ul, li, pre, code, blockquote, del, ins, + strong, em, h1, h2, h3, h4, h5, h6, sub, sup, table, thead, tbody, th, td +allowed-attributes = align, href [hash] salt = Eech7co8Ohloopo9Ol6baimi diff --git a/docs/docs/reference/server-config.rst b/docs/docs/reference/server-config.rst index ea3c91d8..07007b98 100644 --- a/docs/docs/reference/server-config.rst +++ b/docs/docs/reference/server-config.rst @@ -428,37 +428,19 @@ flags .. versionadded:: 0.12.4 allowed-elements - **Additional** HTML tags to allow in the generated output, comma-separated. - - By default, only ``a``, ``blockquote``, ``br``, ``code``, ``del``, ``em``, - ``h1``, ``h2``, ``h3``, ``h4``, ``h5``, ``h6``, ``hr``, ``ins``, ``li``, - ``ol``, ``p``, ``pre``, ``strong``, ``table``, ``tbody``, ``td``, ``th``, - ``thead`` and ``ul`` are allowed. + HTML tags to allow in the generated output, comma-separated. For a more detailed explanation, see :doc:`/docs/reference/markdown-config`. - .. warning:: - - This option (together with ``allowed-attributes``) is frequently - misunderstood. Setting e.g. this list to only ``a, blockquote`` will - mean that ``br, code, del, ...`` and all other default allowed tags are - still allowed. You can only add *additional* elements here. - - It is planned to change this behavior, see - `this issue `_. - - Default: (empty) + Default: ``a, p, hr, br, ol, ul, li, pre, code, blockquote, del, ins, strong, em, h1, h2, h3, h4, h5, h6, sub, sup, table, thead, tbody, th, td`` allowed-attributes - **Additional** HTML attributes (independent from elements) to allow in the + HTML attributes (independent from elements) to allow in the generated output, comma-separated. - By default, only ``align`` and ``href`` are allowed (same caveats as for - ``allowed-elements`` above apply) - For a more detailed explanation, see :doc:`/docs/reference/markdown-config`. - Default: (empty) + Default: ``align, href`` .. note:: To allow images in comments, you need to add ``allowed-elements = img`` and *also* ``allowed-attributes = src``. diff --git a/isso/isso.cfg b/isso/isso.cfg index fec23829..a9cd7677 100644 --- a/isso/isso.cfg +++ b/isso/isso.cfg @@ -211,15 +211,13 @@ options = autolink, fenced-code, no-intra-emphasis, strikethrough, superscript # Per Misaka's defaults, no flags are set. flags = -# Additional HTML tags to allow in the generated output, comma-separated. By -# default, only a, blockquote, br, code, del, em, h1, h2, h3, h4, h5, h6, hr, -# ins, li, ol, p, pre, strong, table, tbody, td, th, thead and ul are allowed. -allowed-elements = - -# Additional HTML attributes (independent from elements) to allow in the -# generated output, comma-separated. By default, only align and href are -# allowed. -allowed-attributes = +# HTML tags to allow in the generated output, comma-separated. +allowed-elements = a, p, hr, br, ol, ul, li, pre, code, blockquote, del, ins, + strong, em, h1, h2, h3, h4, h5, h6, sub, sup, table, thead, tbody, th, td + +# HTML attributes (independent from elements) to allow in the generated output, +# comma-separated. +allowed-attributes = align, href [hash] diff --git a/isso/tests/test_html.py b/isso/tests/test_html.py index 5d5f87a3..fb37f7c1 100644 --- a/isso/tests/test_html.py +++ b/isso/tests/test_html.py @@ -60,7 +60,7 @@ def test_github_flavoured_markdown(self): """) def test_sanitizer(self): - sanitizer = html.Sanitizer(elements=[], attributes=[]) + sanitizer = html.Sanitizer(elements=["p", "a", "code"], attributes=["href"]) examples = [ ('Look: ', 'Look: '), ('Ha', @@ -94,8 +94,8 @@ def test_render(self): "markup": { "options": "autolink", "flags": "", - "allowed-elements": "", - "allowed-attributes": "" + "allowed-elements": "a, p", + "allowed-attributes": "href" } }) renderer = html.Markup(conf.section("markup")).render @@ -109,7 +109,7 @@ def test_sanitized_render_extensions(self): "markup": { "options": "no_intra_emphasis", # Deliberately snake_case "flags": "", - "allowed-elements": "", + "allowed-elements": "p", "allowed-attributes": "" } }) diff --git a/isso/utils/html.py b/isso/utils/html.py index c1aafad1..a9921305 100644 --- a/isso/utils/html.py +++ b/isso/utils/html.py @@ -17,25 +17,17 @@ def allow_attribute_class(tag, name, value): return name == "class" and bool(Sanitizer.code_language_pattern.match(value)) def __init__(self, elements, attributes): - # attributes found in Sundown's HTML serializer [1] - # - except for tag, because images are not generated anyways. - # - sub and sup added - # - # [1] https://github.com/vmg/sundown/blob/master/html/html.c - self.elements = ["a", "p", "hr", "br", "ol", "ul", "li", - "pre", "code", "blockquote", - "del", "ins", "strong", "em", - "h1", "h2", "h3", "h4", "h5", "h6", "sub", "sup", - "table", "thead", "tbody", "th", "td"] + elements + self.elements = elements # allowed attributes for tags self.attributes = { - "table": ["align"], - "a": ["href"], - "code": Sanitizer.allow_attribute_class, "*": attributes } + # If "code" elements are allowed, allow "language-*" CSS classes for syntax highlighting + if "code" in self.elements: + self.attributes["code"] = Sanitizer.allow_attribute_class + def sanitize(self, text): clean_html = bleach.clean(text, tags=self.elements, attributes=self.attributes, strip=True)