From e4499350d8a94c4c693a7e784295eff7a717ae67 Mon Sep 17 00:00:00 2001 From: Carsten Brandt Date: Tue, 9 Feb 2016 23:09:46 +0100 Subject: [PATCH] better handling of continued inline HTML in paragraphs fixes #114 --- GithubMarkdown.php | 11 ++++++++++- Markdown.php | 19 +++++++++++++------ block/HtmlTrait.php | 4 ++-- tests/markdown-data/html-block.html | 18 ++++++++++++++++++ tests/markdown-data/html-block.md | 22 ++++++++++++++++++++++ 5 files changed, 65 insertions(+), 9 deletions(-) diff --git a/GithubMarkdown.php b/GithubMarkdown.php index 3a46b33..23c9d1a 100644 --- a/GithubMarkdown.php +++ b/GithubMarkdown.php @@ -68,7 +68,6 @@ protected function consumeParagraph($lines, $current) || ltrim($line) === '' || !ctype_alpha($line[0]) && ( $this->identifyQuote($line, $lines, $i) || - $this->identifyCode($line, $lines, $i) || $this->identifyFencedCode($line, $lines, $i) || $this->identifyUl($line, $lines, $i) || $this->identifyOl($line, $lines, $i) || @@ -77,6 +76,16 @@ protected function consumeParagraph($lines, $current) || $this->identifyHeadline($line, $lines, $i)) { break; + } elseif ($this->identifyCode($line, $lines, $i)) { + // possible beginning of a code block + // but check for continued inline HTML + // e.g. some alt aligned with src attribute + if (preg_match('~<\w+([^>]+)$~s', implode("\n", $content))) { + $content[] = $line; + } else { + break; + } } else { $content[] = $line; } diff --git a/Markdown.php b/Markdown.php index df29135..32c9b0f 100644 --- a/Markdown.php +++ b/Markdown.php @@ -92,13 +92,20 @@ protected function consumeParagraph($lines, $current) break; } - if ($line !== '' && ltrim($line) !== '' && - !($line[0] === "\t" || $line[0] === " " && strncmp($line, ' ', 4) === 0) && - !$this->identifyHeadline($line, $lines, $i)) - { - $content[] = $line; - } else { + if ($line === '' || ltrim($line) === '' || $this->identifyHeadline($line, $lines, $i)) { break; + } elseif ($line[0] === "\t" || $line[0] === " " && strncmp($line, ' ', 4) === 0) { + // possible beginning of a code block + // but check for continued inline HTML + // e.g. some alt aligned with src attribute + if (preg_match('~<\w+([^>]+)$~s', implode("\n", $content))) { + $content[] = $line; + } else { + break; + } + } else { + $content[] = $line; } } $block = [ diff --git a/block/HtmlTrait.php b/block/HtmlTrait.php index 448518e..2f15135 100644 --- a/block/HtmlTrait.php +++ b/block/HtmlTrait.php @@ -146,10 +146,10 @@ protected function renderInlineHtml($block) protected function parseInlineHtml($text) { if (strpos($text, '>') !== false) { - if (preg_match('~^~', $text, $matches)) { + if (preg_match('~^~s', $text, $matches)) { // HTML tags return [['inlineHtml', $matches[0]], strlen($matches[0])]; - } elseif (preg_match('~^~', $text, $matches)) { + } elseif (preg_match('~^~s', $text, $matches)) { // HTML comments return [['inlineHtml', $matches[0]], strlen($matches[0])]; } diff --git a/tests/markdown-data/html-block.html b/tests/markdown-data/html-block.html index 49198f2..738b51b 100644 --- a/tests/markdown-data/html-block.html +++ b/tests/markdown-data/html-block.html @@ -12,6 +12,9 @@

more markdown here

< this is not an html tag

<thisisnotanhtmltag

+

but this is:

+

some alt aligned with src attribute

some inline md

some inline md

self-closing on block level:

@@ -22,3 +25,18 @@

h1

h2

+

p some alt aligned with src attribute + something

+

p some alt aligned with src attribute

+
something
+
+

p is < than 5

+
this is code
+
+

this paragraph contains a +newline

diff --git a/tests/markdown-data/html-block.md b/tests/markdown-data/html-block.md index 23ed1bc..ac8c9ae 100644 --- a/tests/markdown-data/html-block.md +++ b/tests/markdown-data/html-block.md @@ -17,6 +17,11 @@ more markdown here + some inline **md** some inline **md** @@ -35,3 +40,20 @@ something **bold**. ## h2 + +p some alt aligned with src attribute + something + +p some alt aligned with src attribute + something + +p is < than 5 + this is code + +this paragraph contains a +newline