jupyter · peytondmurray · Jan 6, 2025 · Jan 6, 2025 · Jan 23, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,7 +20,7 @@
 
 ### Maintenance and upkeep improvements
 
-- enhancement dep-chain: directly depend on bleach[css], instead of pulling in tinycss2. [#2166](https://github.com/jupyter/nbconvert/pull/2166) ([@xiacunshun](https://github.com/xiacunshun))
+- enhancement dep-chain: directly depend on bleach\[css\], instead of pulling in tinycss2. [#2166](https://github.com/jupyter/nbconvert/pull/2166) ([@xiacunshun](https://github.com/xiacunshun))
 - chore: update pre-commit hooks [#2146](https://github.com/jupyter/nbconvert/pull/2146) ([@pre-commit-ci](https://github.com/pre-commit-ci))
 
 ### Contributors to this release
@@ -1613,6 +1613,7 @@ raw template
 {%- endblock in_prompt -%}
     """
 
+
 exporter_attr = AttrExporter()
 output_attr, _ = exporter_attr.from_notebook_node(nb)
 assert "raw template" in output_attr

diff --git a/nbconvert/exporters/html.py b/nbconvert/exporters/html.py
@@ -15,6 +15,7 @@
 from bs4 import BeautifulSoup
 from jupyter_core.paths import jupyter_path
 from traitlets import Bool, Unicode, default, validate
+from traitlets import Dict as TraitletsDict
 from traitlets.config import Config
 
 if tuple(int(x) for x in jinja2.__version__.split(".")[:3]) < (3, 0, 0):
@@ -183,6 +184,14 @@ def _template_name_default(self):
 
     output_mimetype = "text/html"
 
+    lexer_options = TraitletsDict(
+        {},
+        help=(
+            "Options to be passed to the pygments lexer for highlighting markdown code blocks. "
+            "See https://pygments.org/docs/lexers/#available-lexers for available options."
+        ),
+    ).tag(config=True)
+
     @property
     def default_config(self):
         c = Config(
@@ -239,6 +248,7 @@ def markdown2html(self, context, source):
             path=path,
             anchor_link_text=self.anchor_link_text,
             exclude_anchor_links=self.exclude_anchor_links,
+            **self.lexer_options,
         )
         return MarkdownWithMath(renderer=renderer).render(source)
 

diff --git a/nbconvert/filters/highlight.py b/nbconvert/filters/highlight.py
@@ -136,7 +136,9 @@ def __call__(self, source, language=None, metadata=None, strip_verbatim=False):
         return latex
 
 
-def _pygments_highlight(source, output_formatter, language="ipython", metadata=None):
+def _pygments_highlight(
+    source, output_formatter, language="ipython", metadata=None, **lexer_options
+):
     """
     Return a syntax-highlighted version of the input source
 
@@ -149,6 +151,10 @@ def _pygments_highlight(source, output_formatter, language="ipython", metadata=N
         language to highlight the syntax of
     metadata : NotebookNode cell metadata
         metadata of the cell to highlight
+    lexer_options : dict
+        Options to pass to the pygments lexer. See
+        https://pygments.org/docs/lexers/#available-lexers for more information about
+        valid lexer options
     """
     from pygments import highlight
     from pygments.lexers import get_lexer_by_name
@@ -179,7 +185,7 @@ def _pygments_highlight(source, output_formatter, language="ipython", metadata=N
 
     if lexer is None:
         try:
-            lexer = get_lexer_by_name(language, stripall=True)
+            lexer = get_lexer_by_name(language, **lexer_options)
         except ClassNotFound:
             warn("No lexer found for language %r. Treating as plain text." % language, stacklevel=2)
             from pygments.lexers.special import TextLexer

diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
@@ -293,13 +293,15 @@ def __init__(
         anchor_link_text: str = "¶",
         path: str = "",
         attachments: Optional[Dict[str, Dict[str, str]]] = None,
+        **lexer_options,
     ):
         """Initialize the renderer."""
         super().__init__(escape, allow_harmful_protocols)
         self.embed_images = embed_images
         self.exclude_anchor_links = exclude_anchor_links
         self.anchor_link_text = anchor_link_text
         self.path = path
+        self.lexer_options = lexer_options
         if attachments is not None:
             self.attachments = attachments
         else:
@@ -317,7 +319,7 @@ def block_code(self, code: str, info: Optional[str] = None) -> str:
             try:
                 if info.strip().split(None, 1):
                     lang = info.strip().split(maxsplit=1)[0]
-                    lexer = get_lexer_by_name(lang, stripall=True)
+                    lexer = get_lexer_by_name(lang, **self.lexer_options)
             except ClassNotFound:
                 code = f"{lang}\n{code}"
                 lang = None

diff --git a/pyproject.toml b/pyproject.toml
@@ -224,7 +224,6 @@ ignore = [
   "T201",   # `print` found
   "RUF012", # Mutable class attributes should be annotated
   "UP031",  # Use format specifiers instead of percent format
-
 ]
 unfixable = [
   "T201",   # Don't touch print statements
@@ -253,6 +252,7 @@ unfixable = [
 "nbconvert/__init__.py" = ["F401", "F403"]
 # PLR2004 Magic value used in comparison
 "nbconvert/filters/ansi.py" = ["PLR2004"]
+"tests/exporters/test_html.py" = ["RUF001"]
 
 [tool.interrogate]
 ignore-init-module=true

diff --git a/tests/exporters/test_html.py b/tests/exporters/test_html.py
@@ -5,6 +5,7 @@
 
 import re
 
+import pytest
 from nbformat import v4
 from traitlets.config import Config
 
@@ -262,3 +263,50 @@ def test_language_code_error(self):
         (output, resources) = exporter.from_filename(self._get_notebook())
 
         assert '<html lang="en">' in output
+
+
+@pytest.mark.parametrize(
+    ("lexer_options"),
+    [
+        {"stripall": True},
+        {"stripall": False},
+        {},
+    ],
+)
+def test_syntax_highlight_leading_whitespace(lexer_options):
+    """Test that syntax highlight doesn't strip leading spaces."""
+    nb = v4.reads(r"""
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "29da71a9-ae40-4098-8c3b-31a98e79fc12",
+   "metadata": {},
+   "source": [
+    "```APL\n",
+    "      1+2×⍳3\n",
+    "3 5 7\n",
+    "```\n",
+    "\n",
+    "```\n",
+    "      1+2×⍳3\n",
+    "3 5 7\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
+    """)
+    output, _ = HTMLExporter(lexer_options=lexer_options).from_notebook_node(nb)
+    # Check that the second code block has the leading spaces
+    assert "<pre><code>      1+2×⍳3\n3 5 7\n</code></pre>" in output
+
+    if lexer_options.get("stripall"):
+        # Check that the APL-formatted code block has leading spaces stripped
+        assert '<span class="w">      </span>' not in output
+    else:
+        # Check that the APL-formatted code block has the leading spaces
+        assert '<span class="w">      </span>' in output