Skip to content

Commit 3b05be9

Browse files
committed
More tests passing
1 parent 36949bf commit 3b05be9

File tree

3 files changed

+43
-43
lines changed

3 files changed

+43
-43
lines changed

markdown/blockprocessors.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -278,11 +278,12 @@ def run(self, parent, blocks):
278278

279279
class RawHtmlProcessor(BlockProcessor):
280280

281-
TAG_RE = re.compile(r'(^|\n)[ ]{0,3}\<(?P<tag>[^<> ]+)[^<>]*>')
281+
TAG_RE = re.compile(r'(^|\n)[ ]{0,3}<([?!].*?|(?P<tag>[^<> ]+)[^<>]*)>', re.S | re.U)
282282

283283
def test(self, parent, block):
284284
m = self.TAG_RE.search(block)
285-
return m and self.parser.md.is_block_level(m.group('tag'))
285+
# If m but no 'tag', then we have a comment, declaration, or processing instruction.
286+
return m and (self.parser.md.is_block_level(m.group('tag')) or not m.group('tag'))
286287

287288
def run(self, parent, blocks):
288289
parser = HTMLExtractor(md=self.parser.md)
@@ -292,7 +293,6 @@ def run(self, parent, blocks):
292293
break
293294
parser.close()
294295
# Insert Markdown back into blocks with raw HTML extracted.
295-
print parser.cleandoc
296296
parts = ''.join(parser.cleandoc).split('\n\n')
297297
parts.reverse()
298298
for block in parts:

markdown/htmlparser.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,12 +98,27 @@ def handle_data(self, data):
9898
else:
9999
self.cleandoc.append(data)
100100

101-
def handle_comment(self, data):
102-
text = '<!--{}-->'.format(data)
101+
def handle_empty_tag(self, data):
102+
""" Handle empty tags (`<data>`). """
103103
line, col = self.getpos()
104104
if self.inraw:
105105
# Append this to the existing raw block
106-
self._cache.append(text)
107-
else:
106+
self._cache.append(data)
107+
elif col < 4:
108108
# Handle this as a standalone raw block
109-
self.cleandoc.append(self.md.htmlStash.store(text))
109+
self.cleandoc.append(self.md.htmlStash.store(data))
110+
else:
111+
# Presumably part of a code block.
112+
self.cleandoc.append(data)
113+
114+
def handle_comment(self, data):
115+
self.handle_empty_tag('<!--{}-->'.format(data))
116+
117+
def handle_decl(self, data):
118+
self.handle_empty_tag('<!{}>'.format(data))
119+
120+
def handle_pi(self, data):
121+
self.handle_empty_tag('<?{}>'.format(data))
122+
123+
def handle_unknown_decl(self, data):
124+
self.handle_empty_tag('<![{}]>'.format(data))

tests/test_syntax/blocks/test_html_blocks.py

Lines changed: 20 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -483,22 +483,12 @@ def test_raw_comment_one_line(self):
483483
'<!-- *foo* -->'
484484
)
485485

486-
# TODO: Decide behavior here. Python-Markdown current outputs:
487-
#
488-
# <!-- *foo* -->
489-
# <p><em>bar</em></p>
490-
#
491-
# But the reference implementation outputs:
492-
#
493-
# <p><!-- *foo* --><em>bar</em></p>
494-
#
495-
# As the raw HTML is not alone on the line, the reference implementation
496-
# considers it inline rather than block level. The behavior defined in
497-
# the test below is from the CommonMark spec, which we don't follow.
486+
# Note: this is a change in behavior for Python_markdown but matches the reference implementation.
487+
# Previous output was `<!-- *foo* -->\n<p><em>bar</em></p>`. Browsers render both the same.
498488
def test_raw_comment_one_line_followed_by_text(self):
499489
self.assertMarkdownRenders(
500490
'<!-- *foo* -->*bar*',
501-
'<!-- *foo* -->*bar*'
491+
'<p><!-- *foo* --><em>bar</em></p>'
502492
)
503493

504494
def test_raw_multiline_comment(self):
@@ -581,6 +571,17 @@ def test_raw_comment_nested(self):
581571
)
582572
)
583573

574+
def test_comment_in_code_block(self):
575+
self.assertMarkdownRenders(
576+
' <!-- *foo* -->',
577+
self.dedent(
578+
"""
579+
<pre><code>&lt;!-- *foo* --&gt;
580+
</code></pre>
581+
"""
582+
)
583+
)
584+
584585
def test_raw_processing_instruction_one_line(self):
585586
self.assertMarkdownRenders(
586587
"<?php echo '>';' ?>",
@@ -662,20 +663,12 @@ def test_raw_declaration_one_line(self):
662663
'<!DOCTYPE html>'
663664
)
664665

665-
# TODO: Decide correct behavior. This matches current behavior and Commonmark.
666-
# The reference implementation considers this inline not block level:
667-
#
668-
# <p><!DOCTYPE html><em>bar</em></p>
669-
#
670-
# But most implementations do this instead:
671-
#
672-
# <p>&lt;!DOCTYPE html&gt;<em>bar</em></p>
673-
#
674-
# Either makes sense, but the later seems more correct to me.
666+
# Note: this is a change in behavior for Python_markdown but matches the reference implementation.
667+
# Previous output was `<!DOCTYPE html>*bar*`.
675668
def test_raw_declaration_one_line_followed_by_text(self):
676669
self.assertMarkdownRenders(
677670
'<!DOCTYPE html>*bar*',
678-
'<!DOCTYPE html>*bar*'
671+
'<p><!DOCTYPE html><em>bar</em></p>'
679672
)
680673

681674
def test_raw_multiline_declaration(self):
@@ -702,20 +695,12 @@ def test_raw_cdata_one_line(self):
702695
'<![CDATA[ document.write(">"); ]]>'
703696
)
704697

705-
# TODO: Decide correct behavior. This matches current behavior and Commonmark.
706-
# The reference implementation considers this inline not block level:
707-
#
708-
# <p><![CDATA[ document.write(">"); ]]><em>bar</em></p>
709-
#
710-
# But most implementations do this instead:
711-
#
712-
# <p>&lt;[CDATA[ document.write(“&gt;”); ]]&gt;<em>bar</em></p>
713-
#
714-
# Either makes sense, but the later seems more correct to me.
698+
# Note: this is a change in behavior for Python_markdown but matches the reference implementation.
699+
# Previous output was `<![CDATA[ document.write(">"); ]]>*bar*`.
715700
def test_raw_cdata_one_line_followed_by_text(self):
716701
self.assertMarkdownRenders(
717702
'<![CDATA[ document.write(">"); ]]>*bar*',
718-
'<![CDATA[ document.write(">"); ]]>*bar*'
703+
'<p><![CDATA[ document.write(">"); ]]><em>bar</em></p>'
719704
)
720705

721706
def test_raw_multiline_cdata(self):

0 commit comments

Comments
 (0)