1# Python Markdown
2
3# A Python implementation of John Gruber's Markdown.
4
5# Documentation: https://python-markdown.github.io/
6# GitHub: https://github.com/Python-Markdown/markdown/
7# PyPI: https://pypi.org/project/Markdown/
8
9# Started by Manfred Stienstra (http://www.dwerg.net/).
10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
11# Currently maintained by Waylan Limberg (https://github.com/waylan),
12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
13
14# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
15# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
16# Copyright 2004 Manfred Stienstra (the original version)
17
18# License: BSD (see LICENSE.md for details).
19
20"""
21A block processor parses blocks of text and adds new elements to the ElementTree. Blocks of text,
22separated from other text by blank lines, may have a different syntax and produce a differently
23structured tree than other Markdown. Block processors excel at handling code formatting, equation
24layouts, tables, etc.
25"""
26
27from __future__ import annotations
28
29import logging
30import re
31import xml.etree.ElementTree as etree
32from typing import TYPE_CHECKING, Any
33from . import util
34from .blockparser import BlockParser
35
36if TYPE_CHECKING: # pragma: no cover
37 from markdown import Markdown
38
39logger = logging.getLogger('MARKDOWN')
40
41
42def build_block_parser(md: Markdown, **kwargs: Any) -> BlockParser:
43 """ Build the default block parser used by Markdown. """
44 parser = BlockParser(md)
45 parser.blockprocessors.register(EmptyBlockProcessor(parser), 'empty', 100)
46 parser.blockprocessors.register(ListIndentProcessor(parser), 'indent', 90)
47 parser.blockprocessors.register(CodeBlockProcessor(parser), 'code', 80)
48 parser.blockprocessors.register(HashHeaderProcessor(parser), 'hashheader', 70)
49 parser.blockprocessors.register(SetextHeaderProcessor(parser), 'setextheader', 60)
50 parser.blockprocessors.register(HRProcessor(parser), 'hr', 50)
51 parser.blockprocessors.register(OListProcessor(parser), 'olist', 40)
52 parser.blockprocessors.register(UListProcessor(parser), 'ulist', 30)
53 parser.blockprocessors.register(BlockQuoteProcessor(parser), 'quote', 20)
54 parser.blockprocessors.register(ReferenceProcessor(parser), 'reference', 15)
55 parser.blockprocessors.register(ParagraphProcessor(parser), 'paragraph', 10)
56 return parser
57
58
59class BlockProcessor:
60 """ Base class for block processors.
61
62 Each subclass will provide the methods below to work with the source and
63 tree. Each processor will need to define it's own `test` and `run`
64 methods. The `test` method should return True or False, to indicate
65 whether the current block should be processed by this processor. If the
66 test passes, the parser will call the processors `run` method.
67
68 Attributes:
69 BlockProcessor.parser (BlockParser): The `BlockParser` instance this is attached to.
70 BlockProcessor.tab_length (int): The tab length set on the `Markdown` instance.
71
72 """
73
74 def __init__(self, parser: BlockParser):
75 self.parser = parser
76 self.tab_length = parser.md.tab_length
77
78 def lastChild(self, parent: etree.Element) -> etree.Element | None:
79 """ Return the last child of an `etree` element. """
80 if len(parent):
81 return parent[-1]
82 else:
83 return None
84
85 def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
86 """ Remove a tab from the front of each line of the given text. """
87 if length is None:
88 length = self.tab_length
89 newtext = []
90 lines = text.split('\n')
91 for line in lines:
92 if line.startswith(' ' * length):
93 newtext.append(line[length:])
94 elif not line.strip():
95 newtext.append('')
96 else:
97 break
98 return '\n'.join(newtext), '\n'.join(lines[len(newtext):])
99
100 def looseDetab(self, text: str, level: int = 1) -> str:
101 """ Remove a tab from front of lines but allowing dedented lines. """
102 lines = text.split('\n')
103 for i in range(len(lines)):
104 if lines[i].startswith(' '*self.tab_length*level):
105 lines[i] = lines[i][self.tab_length*level:]
106 return '\n'.join(lines)
107
108 def test(self, parent: etree.Element, block: str) -> bool:
109 """ Test for block type. Must be overridden by subclasses.
110
111 As the parser loops through processors, it will call the `test`
112 method on each to determine if the given block of text is of that
113 type. This method must return a boolean `True` or `False`. The
114 actual method of testing is left to the needs of that particular
115 block type. It could be as simple as `block.startswith(some_string)`
116 or a complex regular expression. As the block type may be different
117 depending on the parent of the block (i.e. inside a list), the parent
118 `etree` element is also provided and may be used as part of the test.
119
120 Keyword arguments:
121 parent: An `etree` element which will be the parent of the block.
122 block: A block of text from the source which has been split at blank lines.
123 """
124 pass # pragma: no cover
125
126 def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
127 """ Run processor. Must be overridden by subclasses.
128
129 When the parser determines the appropriate type of a block, the parser
130 will call the corresponding processor's `run` method. This method
131 should parse the individual lines of the block and append them to
132 the `etree`.
133
134 Note that both the `parent` and `etree` keywords are pointers
135 to instances of the objects which should be edited in place. Each
136 processor must make changes to the existing objects as there is no
137 mechanism to return new/different objects to replace them.
138
139 This means that this method should be adding `SubElements` or adding text
140 to the parent, and should remove (`pop`) or add (`insert`) items to
141 the list of blocks.
142
143 If `False` is returned, this will have the same effect as returning `False`
144 from the `test` method.
145
146 Keyword arguments:
147 parent: An `etree` element which is the parent of the current block.
148 blocks: A list of all remaining blocks of the document.
149 """
150 pass # pragma: no cover
151
152
153class ListIndentProcessor(BlockProcessor):
154 """ Process children of list items.
155
156 Example
157
158 * a list item
159 process this part
160
161 or this part
162
163 """
164
165 ITEM_TYPES = ['li']
166 """ List of tags used for list items. """
167 LIST_TYPES = ['ul', 'ol']
168 """ Types of lists this processor can operate on. """
169
170 def __init__(self, *args):
171 super().__init__(*args)
172 self.INDENT_RE = re.compile(r'^(([ ]{%s})+)' % self.tab_length)
173
174 def test(self, parent: etree.Element, block: str) -> bool:
175 return block.startswith(' '*self.tab_length) and \
176 not self.parser.state.isstate('detabbed') and \
177 (parent.tag in self.ITEM_TYPES or
178 (len(parent) and parent[-1] is not None and
179 (parent[-1].tag in self.LIST_TYPES)))
180
181 def run(self, parent: etree.Element, blocks: list[str]) -> None:
182 block = blocks.pop(0)
183 level, sibling = self.get_level(parent, block)
184 block = self.looseDetab(block, level)
185
186 self.parser.state.set('detabbed')
187 if parent.tag in self.ITEM_TYPES:
188 # It's possible that this parent has a `ul` or `ol` child list
189 # with a member. If that is the case, then that should be the
190 # parent. This is intended to catch the edge case of an indented
191 # list whose first member was parsed previous to this point
192 # see `OListProcessor`
193 if len(parent) and parent[-1].tag in self.LIST_TYPES:
194 self.parser.parseBlocks(parent[-1], [block])
195 else:
196 # The parent is already a `li`. Just parse the child block.
197 self.parser.parseBlocks(parent, [block])
198 elif sibling.tag in self.ITEM_TYPES:
199 # The sibling is a `li`. Use it as parent.
200 self.parser.parseBlocks(sibling, [block])
201 elif len(sibling) and sibling[-1].tag in self.ITEM_TYPES:
202 # The parent is a list (`ol` or `ul`) which has children.
203 # Assume the last child `li` is the parent of this block.
204 if sibling[-1].text:
205 # If the parent `li` has text, that text needs to be moved to a `p`
206 # The `p` must be 'inserted' at beginning of list in the event
207 # that other children already exist i.e.; a nested sub-list.
208 p = etree.Element('p')
209 p.text = sibling[-1].text
210 sibling[-1].text = ''
211 sibling[-1].insert(0, p)
212 self.parser.parseChunk(sibling[-1], block)
213 else:
214 self.create_item(sibling, block)
215 self.parser.state.reset()
216
217 def create_item(self, parent: etree.Element, block: str) -> None:
218 """ Create a new `li` and parse the block with it as the parent. """
219 li = etree.SubElement(parent, 'li')
220 self.parser.parseBlocks(li, [block])
221
222 def get_level(self, parent: etree.Element, block: str) -> tuple[int, etree.Element]:
223 """ Get level of indentation based on list level. """
224 # Get indent level
225 m = self.INDENT_RE.match(block)
226 if m:
227 indent_level = len(m.group(1))/self.tab_length
228 else:
229 indent_level = 0
230 if self.parser.state.isstate('list'):
231 # We're in a tight-list - so we already are at correct parent.
232 level = 1
233 else:
234 # We're in a loose-list - so we need to find parent.
235 level = 0
236 # Step through children of tree to find matching indent level.
237 while indent_level > level:
238 child = self.lastChild(parent)
239 if (child is not None and
240 (child.tag in self.LIST_TYPES or child.tag in self.ITEM_TYPES)):
241 if child.tag in self.LIST_TYPES:
242 level += 1
243 parent = child
244 else:
245 # No more child levels. If we're short of `indent_level`,
246 # we have a code block. So we stop here.
247 break
248 return level, parent
249
250
251class CodeBlockProcessor(BlockProcessor):
252 """ Process code blocks. """
253
254 def test(self, parent: etree.Element, block: str) -> bool:
255 return block.startswith(' '*self.tab_length)
256
257 def run(self, parent: etree.Element, blocks: list[str]) -> None:
258 sibling = self.lastChild(parent)
259 block = blocks.pop(0)
260 theRest = ''
261 if (sibling is not None and sibling.tag == "pre" and
262 len(sibling) and sibling[0].tag == "code"):
263 # The previous block was a code block. As blank lines do not start
264 # new code blocks, append this block to the previous, adding back
265 # line breaks removed from the split into a list.
266 code = sibling[0]
267 block, theRest = self.detab(block)
268 code.text = util.AtomicString(
269 '{}\n{}\n'.format(code.text, util.code_escape(block.rstrip()))
270 )
271 else:
272 # This is a new code block. Create the elements and insert text.
273 pre = etree.SubElement(parent, 'pre')
274 code = etree.SubElement(pre, 'code')
275 block, theRest = self.detab(block)
276 code.text = util.AtomicString('%s\n' % util.code_escape(block.rstrip()))
277 if theRest:
278 # This block contained unindented line(s) after the first indented
279 # line. Insert these lines as the first block of the master blocks
280 # list for future processing.
281 blocks.insert(0, theRest)
282
283
284class BlockQuoteProcessor(BlockProcessor):
285 """ Process blockquotes. """
286
287 RE = re.compile(r'(^|\n)[ ]{0,3}>[ ]?(.*)')
288
289 def test(self, parent: etree.Element, block: str) -> bool:
290 return bool(self.RE.search(block)) and not util.nearing_recursion_limit()
291
292 def run(self, parent: etree.Element, blocks: list[str]) -> None:
293 block = blocks.pop(0)
294 m = self.RE.search(block)
295 if m:
296 before = block[:m.start()] # Lines before blockquote
297 # Pass lines before blockquote in recursively for parsing first.
298 self.parser.parseBlocks(parent, [before])
299 # Remove `> ` from beginning of each line.
300 block = '\n'.join(
301 [self.clean(line) for line in block[m.start():].split('\n')]
302 )
303 sibling = self.lastChild(parent)
304 if sibling is not None and sibling.tag == "blockquote":
305 # Previous block was a blockquote so set that as this blocks parent
306 quote = sibling
307 else:
308 # This is a new blockquote. Create a new parent element.
309 quote = etree.SubElement(parent, 'blockquote')
310 # Recursively parse block with blockquote as parent.
311 # change parser state so blockquotes embedded in lists use `p` tags
312 self.parser.state.set('blockquote')
313 self.parser.parseChunk(quote, block)
314 self.parser.state.reset()
315
316 def clean(self, line: str) -> str:
317 """ Remove `>` from beginning of a line. """
318 m = self.RE.match(line)
319 if line.strip() == ">":
320 return ""
321 elif m:
322 return m.group(2)
323 else:
324 return line
325
326
327class OListProcessor(BlockProcessor):
328 """ Process ordered list blocks. """
329
330 TAG: str = 'ol'
331 """ The tag used for the the wrapping element. """
332 STARTSWITH: str = '1'
333 """
334 The integer (as a string ) with which the list starts. For example, if a list is initialized as
335 `3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`.
336 """
337 LAZY_OL: bool = True
338 """ Ignore `STARTSWITH` if `True`. """
339 SIBLING_TAGS: list[str] = ['ol', 'ul']
340 """
341 Markdown does not require the type of a new list item match the previous list item type.
342 This is the list of types which can be mixed.
343 """
344
345 def __init__(self, parser: BlockParser):
346 super().__init__(parser)
347 # Detect an item (`1. item`). `group(1)` contains contents of item.
348 self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
349 # Detect items on secondary lines. they can be of either list type.
350 self.CHILD_RE = re.compile(r'^[ ]{0,%d}((\d+\.)|[*+-])[ ]+(.*)' %
351 (self.tab_length - 1))
352 # Detect indented (nested) items of either type
353 self.INDENT_RE = re.compile(r'^[ ]{%d,%d}((\d+\.)|[*+-])[ ]+.*' %
354 (self.tab_length, self.tab_length * 2 - 1))
355
356 def test(self, parent: etree.Element, block: str) -> bool:
357 return bool(self.RE.match(block))
358
359 def run(self, parent: etree.Element, blocks: list[str]) -> None:
360 # Check for multiple items in one block.
361 items = self.get_items(blocks.pop(0))
362 sibling = self.lastChild(parent)
363
364 if sibling is not None and sibling.tag in self.SIBLING_TAGS:
365 # Previous block was a list item, so set that as parent
366 lst = sibling
367 # make sure previous item is in a `p` - if the item has text,
368 # then it isn't in a `p`
369 if lst[-1].text:
370 # since it's possible there are other children for this
371 # sibling, we can't just `SubElement` the `p`, we need to
372 # insert it as the first item.
373 p = etree.Element('p')
374 p.text = lst[-1].text
375 lst[-1].text = ''
376 lst[-1].insert(0, p)
377 # if the last item has a tail, then the tail needs to be put in a `p`
378 # likely only when a header is not followed by a blank line
379 lch = self.lastChild(lst[-1])
380 if lch is not None and lch.tail:
381 p = etree.SubElement(lst[-1], 'p')
382 p.text = lch.tail.lstrip()
383 lch.tail = ''
384
385 # parse first block differently as it gets wrapped in a `p`.
386 li = etree.SubElement(lst, 'li')
387 self.parser.state.set('looselist')
388 firstitem = items.pop(0)
389 self.parser.parseBlocks(li, [firstitem])
390 self.parser.state.reset()
391 elif parent.tag in ['ol', 'ul']:
392 # this catches the edge case of a multi-item indented list whose
393 # first item is in a blank parent-list item:
394 # * * subitem1
395 # * subitem2
396 # see also `ListIndentProcessor`
397 lst = parent
398 else:
399 # This is a new list so create parent with appropriate tag.
400 lst = etree.SubElement(parent, self.TAG)
401 # Check if a custom start integer is set
402 if not self.LAZY_OL and self.STARTSWITH != '1':
403 lst.attrib['start'] = self.STARTSWITH
404
405 self.parser.state.set('list')
406 # Loop through items in block, recursively parsing each with the
407 # appropriate parent.
408 for item in items:
409 if item.startswith(' '*self.tab_length):
410 # Item is indented. Parse with last item as parent
411 self.parser.parseBlocks(lst[-1], [item])
412 else:
413 # New item. Create `li` and parse with it as parent
414 li = etree.SubElement(lst, 'li')
415 self.parser.parseBlocks(li, [item])
416 self.parser.state.reset()
417
418 def get_items(self, block: str) -> list[str]:
419 """ Break a block into list items. """
420 items = []
421 for line in block.split('\n'):
422 m = self.CHILD_RE.match(line)
423 if m:
424 # This is a new list item
425 # Check first item for the start index
426 if not items and self.TAG == 'ol':
427 # Detect the integer value of first list item
428 INTEGER_RE = re.compile(r'(\d+)')
429 self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
430 # Append to the list
431 items.append(m.group(3))
432 elif self.INDENT_RE.match(line):
433 # This is an indented (possibly nested) item.
434 if items[-1].startswith(' '*self.tab_length):
435 # Previous item was indented. Append to that item.
436 items[-1] = '{}\n{}'.format(items[-1], line)
437 else:
438 items.append(line)
439 else:
440 # This is another line of previous item. Append to that item.
441 items[-1] = '{}\n{}'.format(items[-1], line)
442 return items
443
444
445class UListProcessor(OListProcessor):
446 """ Process unordered list blocks. """
447
448 TAG: str = 'ul'
449 """ The tag used for the the wrapping element. """
450
451 def __init__(self, parser: BlockParser):
452 super().__init__(parser)
453 # Detect an item (`1. item`). `group(1)` contains contents of item.
454 self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
455
456
457class HashHeaderProcessor(BlockProcessor):
458 """ Process Hash Headers. """
459
460 # Detect a header at start of any line in block
461 RE = re.compile(r'(?:^|\n)(?P<level>#{1,6})(?P<header>(?:\\.|[^\\])*?)#*(?:\n|$)')
462
463 def test(self, parent: etree.Element, block: str) -> bool:
464 return bool(self.RE.search(block))
465
466 def run(self, parent: etree.Element, blocks: list[str]) -> None:
467 block = blocks.pop(0)
468 m = self.RE.search(block)
469 if m:
470 before = block[:m.start()] # All lines before header
471 after = block[m.end():] # All lines after header
472 if before:
473 # As the header was not the first line of the block and the
474 # lines before the header must be parsed first,
475 # recursively parse this lines as a block.
476 self.parser.parseBlocks(parent, [before])
477 # Create header using named groups from RE
478 h = etree.SubElement(parent, 'h%d' % len(m.group('level')))
479 h.text = m.group('header').strip()
480 if after:
481 # Insert remaining lines as first block for future parsing.
482 if self.parser.state.isstate('looselist'):
483 # This is a weird edge case where a header is a child of a loose list
484 # and there is no blank line after the header. To ensure proper
485 # parsing, the line(s) after need to be detabbed. See #1443.
486 after = self.looseDetab(after)
487 blocks.insert(0, after)
488 else: # pragma: no cover
489 # This should never happen, but just in case...
490 logger.warn("We've got a problem header: %r" % block)
491
492
493class SetextHeaderProcessor(BlockProcessor):
494 """ Process Setext-style Headers. """
495
496 # Detect Setext-style header. Must be first 2 lines of block.
497 RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)
498
499 def test(self, parent: etree.Element, block: str) -> bool:
500 return bool(self.RE.match(block))
501
502 def run(self, parent: etree.Element, blocks: list[str]) -> None:
503 lines = blocks.pop(0).split('\n')
504 # Determine level. `=` is 1 and `-` is 2.
505 if lines[1].startswith('='):
506 level = 1
507 else:
508 level = 2
509 h = etree.SubElement(parent, 'h%d' % level)
510 h.text = lines[0].strip()
511 if len(lines) > 2:
512 # Block contains additional lines. Add to master blocks for later.
513 blocks.insert(0, '\n'.join(lines[2:]))
514
515
516class HRProcessor(BlockProcessor):
517 """ Process Horizontal Rules. """
518
519 # Python's `re` module doesn't officially support atomic grouping. However you can fake it.
520 # See https://stackoverflow.com/a/13577411/866026
521 RE = r'^[ ]{0,3}(?=(?P<atomicgroup>(-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,}))(?P=atomicgroup)[ ]*$'
522 # Detect hr on any line of a block.
523 SEARCH_RE = re.compile(RE, re.MULTILINE)
524
525 def test(self, parent: etree.Element, block: str) -> bool:
526 m = self.SEARCH_RE.search(block)
527 if m:
528 # Save match object on class instance so we can use it later.
529 self.match = m
530 return True
531 return False
532
533 def run(self, parent: etree.Element, blocks: list[str]) -> None:
534 block = blocks.pop(0)
535 match = self.match
536 # Check for lines in block before `hr`.
537 prelines = block[:match.start()].rstrip('\n')
538 if prelines:
539 # Recursively parse lines before `hr` so they get parsed first.
540 self.parser.parseBlocks(parent, [prelines])
541 # create hr
542 etree.SubElement(parent, 'hr')
543 # check for lines in block after `hr`.
544 postlines = block[match.end():].lstrip('\n')
545 if postlines:
546 # Add lines after `hr` to master blocks for later parsing.
547 blocks.insert(0, postlines)
548
549
550class EmptyBlockProcessor(BlockProcessor):
551 """ Process blocks that are empty or start with an empty line. """
552
553 def test(self, parent: etree.Element, block: str) -> bool:
554 return not block or block.startswith('\n')
555
556 def run(self, parent: etree.Element, blocks: list[str]) -> None:
557 block = blocks.pop(0)
558 filler = '\n\n'
559 if block:
560 # Starts with empty line
561 # Only replace a single line.
562 filler = '\n'
563 # Save the rest for later.
564 theRest = block[1:]
565 if theRest:
566 # Add remaining lines to master blocks for later.
567 blocks.insert(0, theRest)
568 sibling = self.lastChild(parent)
569 if (sibling is not None and sibling.tag == 'pre' and
570 len(sibling) and sibling[0].tag == 'code'):
571 # Last block is a code block. Append to preserve whitespace.
572 sibling[0].text = util.AtomicString(
573 '{}{}'.format(sibling[0].text, filler)
574 )
575
576
577class ReferenceProcessor(BlockProcessor):
578 """ Process link references. """
579 RE = re.compile(
580 r'^[ ]{0,3}\[([^\[\]]*)\]:[ ]*\n?[ ]*([^\s]+)[ ]*(?:\n[ ]*)?((["\'])(.*)\4[ ]*|\((.*)\)[ ]*)?$', re.MULTILINE
581 )
582
583 def test(self, parent: etree.Element, block: str) -> bool:
584 return True
585
586 def run(self, parent: etree.Element, blocks: list[str]) -> bool:
587 block = blocks.pop(0)
588 m = self.RE.search(block)
589 if m:
590 id = m.group(1).strip().lower()
591 link = m.group(2).lstrip('<').rstrip('>')
592 title = m.group(5) or m.group(6)
593 self.parser.md.references[id] = (link, title)
594 if block[m.end():].strip():
595 # Add any content after match back to blocks as separate block
596 blocks.insert(0, block[m.end():].lstrip('\n'))
597 if block[:m.start()].strip():
598 # Add any content before match back to blocks as separate block
599 blocks.insert(0, block[:m.start()].rstrip('\n'))
600 return True
601 # No match. Restore block.
602 blocks.insert(0, block)
603 return False
604
605
606class ParagraphProcessor(BlockProcessor):
607 """ Process Paragraph blocks. """
608
609 def test(self, parent: etree.Element, block: str) -> bool:
610 return True
611
612 def run(self, parent: etree.Element, blocks: list[str]) -> None:
613 block = blocks.pop(0)
614 if block.strip():
615 # Not a blank block. Add to parent, otherwise throw it away.
616 if self.parser.state.isstate('list'):
617 # The parent is a tight-list.
618 #
619 # Check for any children. This will likely only happen in a
620 # tight-list when a header isn't followed by a blank line.
621 # For example:
622 #
623 # * # Header
624 # Line 2 of list item - not part of header.
625 sibling = self.lastChild(parent)
626 if sibling is not None:
627 # Insert after sibling.
628 if sibling.tail:
629 sibling.tail = '{}\n{}'.format(sibling.tail, block)
630 else:
631 sibling.tail = '\n%s' % block
632 else:
633 # Append to parent.text
634 if parent.text:
635 parent.text = '{}\n{}'.format(parent.text, block)
636 else:
637 parent.text = block.lstrip()
638 else:
639 # Create a regular paragraph
640 p = etree.SubElement(parent, 'p')
641 p.text = block.lstrip()