CustomAction { get; init; }
}
\ No newline at end of file
diff --git a/src/Html2Markdown/Replacement/HtmlParser.cs b/src/Html2Markdown/Replacement/HtmlParser.cs
index b92c042c..93b1f45a 100644
--- a/src/Html2Markdown/Replacement/HtmlParser.cs
+++ b/src/Html2Markdown/Replacement/HtmlParser.cs
@@ -38,7 +38,10 @@ private static string ReplaceList(string html)
listItems.ToList().ForEach(listItem =>
{
var listPrefix = listType.Equals("ol") ? $"{++counter}. " : "* ";
- var finalList = listItem.Replace(@"", string.Empty);
+ //In case of multiline Html, a line can end with a new line. In this case we want to remove the closing tag as well as the new line
+ //otherwise we may only keep the line breaks between tags and create a double line break in the markdown
+ var closingTag = listItem.EndsWith($"{Environment.NewLine}") ? $"{Environment.NewLine}" : "";
+ var finalList = listItem.Replace(closingTag, string.Empty);
if (finalList.Trim().Length == 0) {
return;
@@ -48,10 +51,16 @@ private static string ReplaceList(string html)
finalList = TwoNewLines().Replace(finalList, $"{Environment.NewLine}{Environment.NewLine} ");
// indent nested lists
finalList = NestedList().Replace(finalList, "\n$1 $2");
+ // remove the indent from the first line
+ if (listItem.StartsWith(""))
+ {
+ finalList = ReplaceParagraph(finalList, true);
+ }
markdownList.Add($"{listPrefix}{finalList}");
});
- return Environment.NewLine + Environment.NewLine + markdownList.Aggregate((current, item) => current + Environment.NewLine + item) + Environment.NewLine + Environment.NewLine;
+ //If a new line is already ending the markdown item, then we don't need to add another one
+ return Environment.NewLine + Environment.NewLine + markdownList.Aggregate((current, item) => current.EndsWith(Environment.NewLine) ? current + item : current + Environment.NewLine + item) + Environment.NewLine + Environment.NewLine;
}
private static bool ListIsEmpty(IReadOnlyCollection listItems)
@@ -128,7 +137,7 @@ internal static string ReplaceImg(string html)
return doc.DocumentNode.OuterHtml;
}
- public static string ReplaceAnchor(string html)
+ internal static string ReplaceAnchor(string html)
{
var doc = GetHtmlDocument(html);
var nodes = doc.DocumentNode.SelectNodes("//a");
@@ -155,9 +164,7 @@ public static string ReplaceAnchor(string html)
return doc.DocumentNode.OuterHtml;
}
- public static string ReplaceCode(string html) => ReplaceCode(html, false);
-
- public static string ReplaceCode(string html, bool supportSyntaxHighlighting)
+ internal static string ReplaceCode(string html, bool supportSyntaxHighlighting)
{
var doc = GetHtmlDocument(html);
var nodes = doc.DocumentNode.SelectNodes("//code");
@@ -219,7 +226,7 @@ private static string GetSyntaxHighlightLanguage(HtmlNode node)
: classAttributeValue;
}
- public static string ReplaceBlockquote(string html)
+ internal static string ReplaceBlockquote(string html)
{
var doc = GetHtmlDocument(html);
var nodes = doc.DocumentNode.SelectNodes("//blockquote");
@@ -248,12 +255,14 @@ public static string ReplaceBlockquote(string html)
return doc.DocumentNode.OuterHtml;
}
- public static string ReplaceEntities(string html)
+ internal static string ReplaceEntities(string html)
{
return WebUtility.HtmlDecode(html);
}
- public static string ReplaceParagraph(string html)
+ internal static string ReplaceParagraph(string html) => ReplaceParagraph(html, false);
+
+ private static string ReplaceParagraph(string html, bool nestedIntoList)
{
var doc = GetHtmlDocument(html);
var nodes = doc.DocumentNode.SelectNodes("//p");
@@ -266,7 +275,12 @@ public static string ReplaceParagraph(string html)
var text = node.InnerHtml;
var markdown = Spaces().Replace(text, " ");
markdown = markdown.Replace(Environment.NewLine, " ");
- markdown = Environment.NewLine + Environment.NewLine + markdown + Environment.NewLine;
+
+ //If a paragraph is contained in a list, we don't want to add new line characters
+ var openingTag = nestedIntoList ? "" : Environment.NewLine + Environment.NewLine;
+ var closingTag = nestedIntoList ? "" : Environment.NewLine;
+
+ markdown = openingTag + markdown + closingTag;
ReplaceNode(node, markdown);
});
diff --git a/src/Html2Markdown/Scheme/CommonMark.cs b/src/Html2Markdown/Scheme/CommonMark.cs
index 8e0cc6cc..75f7374c 100644
--- a/src/Html2Markdown/Scheme/CommonMark.cs
+++ b/src/Html2Markdown/Scheme/CommonMark.cs
@@ -14,7 +14,7 @@ public class CommonMark : AbstractScheme
{
public CommonMark()
{
- AddReplacementGroup(ReplacerCollection, new TextFormattingReplacementGroup());
+ AddReplacementGroup(ReplacerCollection, new CommonMarkTextFormattingReplacementGroup());
AddReplacementGroup(ReplacerCollection, new HeadingReplacementGroup());
AddReplacementGroup(ReplacerCollection, new IllegalHtmlReplacementGroup());
AddReplacementGroup(ReplacerCollection, new CommonMarkLayoutReplacementGroup());
diff --git a/test/Html2Markdown.Test/CommonMarkSchemeConverterTest.Convert_WhenThereIsAMultilineOrderedListWithNestedParagraphsAndCodeElement_ThenReplaceWithMarkdownLists.verified.txt b/test/Html2Markdown.Test/CommonMarkSchemeConverterTest.Convert_WhenThereIsAMultilineOrderedListWithNestedParagraphsAndCodeElement_ThenReplaceWithMarkdownLists.verified.txt
new file mode 100644
index 00000000..2771a2a4
--- /dev/null
+++ b/test/Html2Markdown.Test/CommonMarkSchemeConverterTest.Convert_WhenThereIsAMultilineOrderedListWithNestedParagraphsAndCodeElement_ThenReplaceWithMarkdownLists.verified.txt
@@ -0,0 +1,6 @@
+This code is with an ordered list and paragraphs.
+
+1. Yes, this is a `code` element
+2. No :
+
+ * `Some code we are looking at`
\ No newline at end of file
diff --git a/test/Html2Markdown.Test/CommonMarkSchemeConverterTest.Convert_WhenThereIsAnOrderedListWithNestedParagraphs_ThenReplaceWithMarkdownLists.verified.txt b/test/Html2Markdown.Test/CommonMarkSchemeConverterTest.Convert_WhenThereIsAnOrderedListWithNestedParagraphs_ThenReplaceWithMarkdownLists.verified.txt
new file mode 100644
index 00000000..2771a2a4
--- /dev/null
+++ b/test/Html2Markdown.Test/CommonMarkSchemeConverterTest.Convert_WhenThereIsAnOrderedListWithNestedParagraphs_ThenReplaceWithMarkdownLists.verified.txt
@@ -0,0 +1,6 @@
+This code is with an ordered list and paragraphs.
+
+1. Yes, this is a `code` element
+2. No :
+
+ * `Some code we are looking at`
\ No newline at end of file
diff --git a/test/Html2Markdown.Test/MarkdownSchemeConverterTest.Convert_WhenThereIsAMultilineOrderedListWithNestedParagraphsAndCodeElement_ThenReplaceWithMarkdownLists.verified.txt b/test/Html2Markdown.Test/MarkdownSchemeConverterTest.Convert_WhenThereIsAMultilineOrderedListWithNestedParagraphsAndCodeElement_ThenReplaceWithMarkdownLists.verified.txt
new file mode 100644
index 00000000..2771a2a4
--- /dev/null
+++ b/test/Html2Markdown.Test/MarkdownSchemeConverterTest.Convert_WhenThereIsAMultilineOrderedListWithNestedParagraphsAndCodeElement_ThenReplaceWithMarkdownLists.verified.txt
@@ -0,0 +1,6 @@
+This code is with an ordered list and paragraphs.
+
+1. Yes, this is a `code` element
+2. No :
+
+ * `Some code we are looking at`
\ No newline at end of file
diff --git a/test/Html2Markdown.Test/MarkdownSchemeConverterTest.Convert_WhenThereIsAnOrderedListWithNestedParagraphs_ThenReplaceWithMarkdownLists.verified.txt b/test/Html2Markdown.Test/MarkdownSchemeConverterTest.Convert_WhenThereIsAnOrderedListWithNestedParagraphs_ThenReplaceWithMarkdownLists.verified.txt
new file mode 100644
index 00000000..2771a2a4
--- /dev/null
+++ b/test/Html2Markdown.Test/MarkdownSchemeConverterTest.Convert_WhenThereIsAnOrderedListWithNestedParagraphs_ThenReplaceWithMarkdownLists.verified.txt
@@ -0,0 +1,6 @@
+This code is with an ordered list and paragraphs.
+
+1. Yes, this is a `code` element
+2. No :
+
+ * `Some code we are looking at`
\ No newline at end of file
diff --git a/test/Html2Markdown.Test/MarkdownSchemeConverterTest.cs b/test/Html2Markdown.Test/MarkdownSchemeConverterTest.cs
index ba6630c5..5128aaf6 100644
--- a/test/Html2Markdown.Test/MarkdownSchemeConverterTest.cs
+++ b/test/Html2Markdown.Test/MarkdownSchemeConverterTest.cs
@@ -608,6 +608,32 @@ public Task Convert_WhenThereIsAnOrderedListWithANestedUnorderedList_ThenReplace
return CheckConversion(html);
}
+ [Test]
+ public Task Convert_WhenThereIsAnOrderedListWithNestedParagraphs_ThenReplaceWithMarkdownLists()
+ {
+ const string html = @"This code is with an ordered list and paragraphs.
Yes, this is a code
element
No :
Some code we are looking at
";
+
+ return CheckConversion(html);
+ }
+
+ [Test]
+ public Task Convert_WhenThereIsAMultilineOrderedListWithNestedParagraphsAndCodeElement_ThenReplaceWithMarkdownLists()
+ {
+ const string html = @"This code is with an ordered list and paragraphs.
+
+Yes, this is a code
element
+
+No :
+
+Some code we are looking at
+
+
+
+";
+
+ return CheckConversion(html);
+ }
+
#endregion
#region Extra HTML Removal