Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/postprocessors.py: 92%

1# Python Markdown

3# A Python implementation of John Gruber's Markdown.

5# Documentation: https://python-markdown.github.io/

6# GitHub: https://github.com/Python-Markdown/markdown/

7# PyPI: https://pypi.org/project/Markdown/

9# Started by Manfred Stienstra (http://www.dwerg.net/).

10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).

11# Currently maintained by Waylan Limberg (https://github.com/waylan),

12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).

18# License: BSD (see LICENSE.md for details).

20"""

22Post-processors run on the text of the entire document after is has been serialized into a string.

23Postprocessors should be used to work with the text just before output. Usually, they are used add

24back sections that were extracted in a preprocessor, fix up outgoing encodings, or wrap the whole

25document.

27"""

29from __future__ import annotations

31from typing import TYPE_CHECKING, Any

32from . import util

33import re

35if TYPE_CHECKING: # pragma: no cover

36 from markdown import Markdown

39def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]:

40 """ Build the default postprocessors for Markdown. """

41 postprocessors = util.Registry()

42 postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)

43 postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20)

44 return postprocessors

47class Postprocessor(util.Processor):

48 """

49 Postprocessors are run after the ElementTree it converted back into text.

51 Each Postprocessor implements a `run` method that takes a pointer to a

52 text string, modifies it as necessary and returns a text string.

54 Postprocessors must extend `Postprocessor`.

56 """

58 def run(self, text: str) -> str:

59 """

60 Subclasses of `Postprocessor` should implement a `run` method, which

61 takes the html document as a single text string and returns a

62 (possibly modified) string.

64 """

65 pass # pragma: no cover

68class RawHtmlPostprocessor(Postprocessor):

69 """ Restore raw html to the document. """

71 BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)')

73 def run(self, text: str) -> str:

74 """ Iterate over html stash and restore html. """

75 def substitute_match(m: re.Match[str]) -> str:

76 if key := m.group(1):

77 wrapped = True

78 else:

79 key = m.group(2)

80 wrapped = False

81 if (key := int(key)) >= self.md.htmlStash.html_counter:

82 return m.group(0)

83 html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[key])

84 if not wrapped or self.isblocklevel(html):

85 return pattern.sub(substitute_match, html)

86 return pattern.sub(substitute_match, f"<p>{html}</p>")

88 if self.md.htmlStash.html_counter:

89 base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)'

90 pattern = re.compile(f'<p>{ base_placeholder }</p>|{ base_placeholder }')

91 return pattern.sub(substitute_match, text)

92 else:

93 return text

95 def isblocklevel(self, html: str) -> bool:

96 """ Check is block of HTML is block-level. """

97 m = self.BLOCK_LEVEL_REGEX.match(html)

98 if m:

99 if m.group(1)[0] in ('!', '?', '@', '%'):

100 # Comment, PHP etc...

101 return True

102 return self.md.is_block_level(m.group(1))

103 return False

104

105 def stash_to_string(self, text: str) -> str:

106 """ Convert a stashed object to a string. """

107 return str(text)

108

109

110class AndSubstitutePostprocessor(Postprocessor):

111 """ Restore valid entities """

112

113 def run(self, text: str) -> str:

114 text = text.replace(util.AMP_SUBSTITUTE, "&")

115 return text

116

117

118@util.deprecated(

119 "This class is deprecated and will be removed in the future; "

120 "use [`UnescapeTreeprocessor`][markdown.treeprocessors.UnescapeTreeprocessor] instead."

121)

122class UnescapePostprocessor(Postprocessor):

123 """ Restore escaped chars. """

124

125 RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))

126

127 def unescape(self, m: re.Match[str]) -> str:

128 return chr(int(m.group(1)))

129

130 def run(self, text: str) -> str:

131 return self.RE.sub(self.unescape, text)