Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/markdown/postprocessors.py: 72%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

57 statements  

1# Python Markdown 

2 

3# A Python implementation of John Gruber's Markdown. 

4 

5# Documentation: https://python-markdown.github.io/ 

6# GitHub: https://github.com/Python-Markdown/markdown/ 

7# PyPI: https://pypi.org/project/Markdown/ 

8 

9# Started by Manfred Stienstra (http://www.dwerg.net/). 

10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). 

11# Currently maintained by Waylan Limberg (https://github.com/waylan), 

12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). 

13 

14# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later) 

15# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 

16# Copyright 2004 Manfred Stienstra (the original version) 

17 

18# License: BSD (see LICENSE.md for details). 

19 

20""" 

21 

22Post-processors run on the text of the entire document after is has been serialized into a string. 

23Postprocessors should be used to work with the text just before output. Usually, they are used add 

24back sections that were extracted in a preprocessor, fix up outgoing encodings, or wrap the whole 

25document. 

26 

27""" 

28 

29from __future__ import annotations 

30 

31from collections import OrderedDict 

32from typing import TYPE_CHECKING, Any 

33from . import util 

34import re 

35 

36if TYPE_CHECKING: # pragma: no cover 

37 from markdown import Markdown 

38 

39 

40def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]: 

41 """ Build the default postprocessors for Markdown. """ 

42 postprocessors = util.Registry() 

43 postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30) 

44 postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20) 

45 return postprocessors 

46 

47 

48class Postprocessor(util.Processor): 

49 """ 

50 Postprocessors are run after the ElementTree it converted back into text. 

51 

52 Each Postprocessor implements a `run` method that takes a pointer to a 

53 text string, modifies it as necessary and returns a text string. 

54 

55 Postprocessors must extend `Postprocessor`. 

56 

57 """ 

58 

59 def run(self, text: str) -> str: 

60 """ 

61 Subclasses of `Postprocessor` should implement a `run` method, which 

62 takes the html document as a single text string and returns a 

63 (possibly modified) string. 

64 

65 """ 

66 pass # pragma: no cover 

67 

68 

69class RawHtmlPostprocessor(Postprocessor): 

70 """ Restore raw html to the document. """ 

71 

72 BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)') 

73 

74 def run(self, text: str) -> str: 

75 """ Iterate over html stash and restore html. """ 

76 replacements = OrderedDict() 

77 for i in range(self.md.htmlStash.html_counter): 

78 html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i]) 

79 if self.isblocklevel(html): 

80 replacements["<p>{}</p>".format( 

81 self.md.htmlStash.get_placeholder(i))] = html 

82 replacements[self.md.htmlStash.get_placeholder(i)] = html 

83 

84 def substitute_match(m: re.Match[str]) -> str: 

85 key = m.group(0) 

86 

87 if key not in replacements: 

88 if key[3:-4] in replacements: 

89 return f'<p>{ replacements[key[3:-4]] }</p>' 

90 else: 

91 return key 

92 

93 return replacements[key] 

94 

95 if replacements: 

96 base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)' 

97 pattern = re.compile(f'<p>{ base_placeholder }</p>|{ base_placeholder }') 

98 processed_text = pattern.sub(substitute_match, text) 

99 else: 

100 return text 

101 

102 if processed_text == text: 

103 return processed_text 

104 else: 

105 return self.run(processed_text) 

106 

107 def isblocklevel(self, html: str) -> bool: 

108 """ Check is block of HTML is block-level. """ 

109 m = self.BLOCK_LEVEL_REGEX.match(html) 

110 if m: 

111 if m.group(1)[0] in ('!', '?', '@', '%'): 

112 # Comment, PHP etc... 

113 return True 

114 return self.md.is_block_level(m.group(1)) 

115 return False 

116 

117 def stash_to_string(self, text: str) -> str: 

118 """ Convert a stashed object to a string. """ 

119 return str(text) 

120 

121 

122class AndSubstitutePostprocessor(Postprocessor): 

123 """ Restore valid entities """ 

124 

125 def run(self, text: str) -> str: 

126 text = text.replace(util.AMP_SUBSTITUTE, "&") 

127 return text 

128 

129 

130@util.deprecated( 

131 "This class is deprecated and will be removed in the future; " 

132 "use [`UnescapeTreeprocessor`][markdown.treeprocessors.UnescapeTreeprocessor] instead." 

133) 

134class UnescapePostprocessor(Postprocessor): 

135 """ Restore escaped chars. """ 

136 

137 RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) 

138 

139 def unescape(self, m: re.Match[str]) -> str: 

140 return chr(int(m.group(1))) 

141 

142 def run(self, text: str) -> str: 

143 return self.RE.sub(self.unescape, text)