diff --git a/CHANGELOG.md b/CHANGELOG.md index 46457bd68..ebdae243b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ ### Maintenance and upkeep improvements -- enhancement dep-chain: directly depend on bleach[css], instead of pulling in tinycss2. [#2166](https://github.com/jupyter/nbconvert/pull/2166) ([@xiacunshun](https://github.com/xiacunshun)) +- enhancement dep-chain: directly depend on bleach\[css\], instead of pulling in tinycss2. [#2166](https://github.com/jupyter/nbconvert/pull/2166) ([@xiacunshun](https://github.com/xiacunshun)) - chore: update pre-commit hooks [#2146](https://github.com/jupyter/nbconvert/pull/2146) ([@pre-commit-ci](https://github.com/pre-commit-ci)) ### Contributors to this release @@ -1613,6 +1613,7 @@ raw template {%- endblock in_prompt -%} """ + exporter_attr = AttrExporter() output_attr, _ = exporter_attr.from_notebook_node(nb) assert "raw template" in output_attr diff --git a/nbconvert/exporters/html.py b/nbconvert/exporters/html.py index d63699c0d..56724b062 100644 --- a/nbconvert/exporters/html.py +++ b/nbconvert/exporters/html.py @@ -15,6 +15,7 @@ from bs4 import BeautifulSoup from jupyter_core.paths import jupyter_path from traitlets import Bool, Unicode, default, validate +from traitlets import Dict as TraitletsDict from traitlets.config import Config if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0): @@ -183,6 +184,14 @@ def _template_name_default(self): output_mimetype = "text/html" + lexer_options = TraitletsDict( + {}, + help=( + "Options to be passed to the pygments lexer for highlighting markdown code blocks. " + "See https://pygments.org/docs/lexers/#available-lexers for available options." + ), + ).tag(config=True) + @property def default_config(self): c = Config( @@ -239,6 +248,7 @@ def markdown2html(self, context, source): path=path, anchor_link_text=self.anchor_link_text, exclude_anchor_links=self.exclude_anchor_links, + **self.lexer_options, ) return MarkdownWithMath(renderer=renderer).render(source) diff --git a/nbconvert/filters/highlight.py b/nbconvert/filters/highlight.py index d29a4759a..3a85bd17b 100644 --- a/nbconvert/filters/highlight.py +++ b/nbconvert/filters/highlight.py @@ -136,7 +136,9 @@ def __call__(self, source, language=None, metadata=None, strip_verbatim=False): return latex -def _pygments_highlight(source, output_formatter, language="ipython", metadata=None): +def _pygments_highlight( + source, output_formatter, language="ipython", metadata=None, **lexer_options +): """ Return a syntax-highlighted version of the input source @@ -149,6 +151,10 @@ def _pygments_highlight(source, output_formatter, language="ipython", metadata=N language to highlight the syntax of metadata : NotebookNode cell metadata metadata of the cell to highlight + lexer_options : dict + Options to pass to the pygments lexer. See + https://pygments.org/docs/lexers/#available-lexers for more information about + valid lexer options """ from pygments import highlight from pygments.lexers import get_lexer_by_name @@ -179,7 +185,7 @@ def _pygments_highlight(source, output_formatter, language="ipython", metadata=N if lexer is None: try: - lexer = get_lexer_by_name(language, stripall=True) + lexer = get_lexer_by_name(language, **lexer_options) except ClassNotFound: warn("No lexer found for language %r. Treating as plain text." % language, stacklevel=2) from pygments.lexers.special import TextLexer diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py index fb8828167..ea46bba09 100644 --- a/nbconvert/filters/markdown_mistune.py +++ b/nbconvert/filters/markdown_mistune.py @@ -293,6 +293,7 @@ def __init__( anchor_link_text: str = "¶", path: str = "", attachments: Optional[Dict[str, Dict[str, str]]] = None, + **lexer_options, ): """Initialize the renderer.""" super().__init__(escape, allow_harmful_protocols) @@ -300,6 +301,7 @@ def __init__( self.exclude_anchor_links = exclude_anchor_links self.anchor_link_text = anchor_link_text self.path = path + self.lexer_options = lexer_options if attachments is not None: self.attachments = attachments else: @@ -317,7 +319,7 @@ def block_code(self, code: str, info: Optional[str] = None) -> str: try: if info.strip().split(None, 1): lang = info.strip().split(maxsplit=1)[0] - lexer = get_lexer_by_name(lang, stripall=True) + lexer = get_lexer_by_name(lang, **self.lexer_options) except ClassNotFound: code = f"{lang}\n{code}" lang = None diff --git a/pyproject.toml b/pyproject.toml index 9d8768138..06fd056f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -224,7 +224,6 @@ ignore = [ "T201", # `print` found "RUF012", # Mutable class attributes should be annotated "UP031", # Use format specifiers instead of percent format - ] unfixable = [ "T201", # Don't touch print statements @@ -253,6 +252,7 @@ unfixable = [ "nbconvert/__init__.py" = ["F401", "F403"] # PLR2004 Magic value used in comparison "nbconvert/filters/ansi.py" = ["PLR2004"] +"tests/exporters/test_html.py" = ["RUF001"] [tool.interrogate] ignore-init-module=true diff --git a/tests/exporters/test_html.py b/tests/exporters/test_html.py index 810de47ec..f1bfb69b8 100644 --- a/tests/exporters/test_html.py +++ b/tests/exporters/test_html.py @@ -5,6 +5,7 @@ import re +import pytest from nbformat import v4 from traitlets.config import Config @@ -262,3 +263,50 @@ def test_language_code_error(self): (output, resources) = exporter.from_filename(self._get_notebook()) assert '' in output + + +@pytest.mark.parametrize( + ("lexer_options"), + [ + {"stripall": True}, + {"stripall": False}, + {}, + ], +) +def test_syntax_highlight_leading_whitespace(lexer_options): + """Test that syntax highlight doesn't strip leading spaces.""" + nb = v4.reads(r""" +{ + "cells": [ + { + "cell_type": "markdown", + "id": "29da71a9-ae40-4098-8c3b-31a98e79fc12", + "metadata": {}, + "source": [ + "```APL\n", + " 1+2×⍳3\n", + "3 5 7\n", + "```\n", + "\n", + "```\n", + " 1+2×⍳3\n", + "3 5 7\n", + "```" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} + """) + output, _ = HTMLExporter(lexer_options=lexer_options).from_notebook_node(nb) + # Check that the second code block has the leading spaces + assert "
1+2×⍳3\n3 5 7\n
" in output
+
+ if lexer_options.get("stripall"):
+ # Check that the APL-formatted code block has leading spaces stripped
+ assert ' ' not in output
+ else:
+ # Check that the APL-formatted code block has the leading spaces
+ assert ' ' in output