Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/markdown/postprocessors.py: 92%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

52 statements  

1# Python Markdown 

2 

3# A Python implementation of John Gruber's Markdown. 

4 

5# Documentation: https://python-markdown.github.io/ 

6# GitHub: https://github.com/Python-Markdown/markdown/ 

7# PyPI: https://pypi.org/project/Markdown/ 

8 

9# Started by Manfred Stienstra (http://www.dwerg.net/). 

10# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org). 

11# Currently maintained by Waylan Limberg (https://github.com/waylan), 

12# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser). 

13 

14# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later) 

15# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b) 

16# Copyright 2004 Manfred Stienstra (the original version) 

17 

18# License: BSD (see LICENSE.md for details). 

19 

20""" 

21 

22Post-processors run on the text of the entire document after is has been serialized into a string. 

23Postprocessors should be used to work with the text just before output. Usually, they are used add 

24back sections that were extracted in a preprocessor, fix up outgoing encodings, or wrap the whole 

25document. 

26 

27""" 

28 

29from __future__ import annotations 

30 

31from typing import TYPE_CHECKING, Any 

32from . import util 

33import re 

34 

35if TYPE_CHECKING: # pragma: no cover 

36 from markdown import Markdown 

37 

38 

39def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]: 

40 """ Build the default postprocessors for Markdown. """ 

41 postprocessors = util.Registry() 

42 postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30) 

43 postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20) 

44 return postprocessors 

45 

46 

47class Postprocessor(util.Processor): 

48 """ 

49 Postprocessors are run after the ElementTree it converted back into text. 

50 

51 Each Postprocessor implements a `run` method that takes a pointer to a 

52 text string, modifies it as necessary and returns a text string. 

53 

54 Postprocessors must extend `Postprocessor`. 

55 

56 """ 

57 

58 def run(self, text: str) -> str: 

59 """ 

60 Subclasses of `Postprocessor` should implement a `run` method, which 

61 takes the html document as a single text string and returns a 

62 (possibly modified) string. 

63 

64 """ 

65 pass # pragma: no cover 

66 

67 

68class RawHtmlPostprocessor(Postprocessor): 

69 """ Restore raw html to the document. """ 

70 

71 BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)') 

72 

73 def run(self, text: str) -> str: 

74 """ Iterate over html stash and restore html. """ 

75 def substitute_match(m: re.Match[str]) -> str: 

76 if key := m.group(1): 

77 wrapped = True 

78 else: 

79 key = m.group(2) 

80 wrapped = False 

81 if (key := int(key)) >= self.md.htmlStash.html_counter: 

82 return m.group(0) 

83 html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[key]) 

84 if not wrapped or self.isblocklevel(html): 

85 return pattern.sub(substitute_match, html) 

86 return pattern.sub(substitute_match, f"<p>{html}</p>") 

87 

88 if self.md.htmlStash.html_counter: 

89 base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)' 

90 pattern = re.compile(f'<p>{ base_placeholder }</p>|{ base_placeholder }') 

91 return pattern.sub(substitute_match, text) 

92 else: 

93 return text 

94 

95 def isblocklevel(self, html: str) -> bool: 

96 """ Check is block of HTML is block-level. """ 

97 m = self.BLOCK_LEVEL_REGEX.match(html) 

98 if m: 

99 if m.group(1)[0] in ('!', '?', '@', '%'): 

100 # Comment, PHP etc... 

101 return True 

102 return self.md.is_block_level(m.group(1)) 

103 return False 

104 

105 def stash_to_string(self, text: str) -> str: 

106 """ Convert a stashed object to a string. """ 

107 return str(text) 

108 

109 

110class AndSubstitutePostprocessor(Postprocessor): 

111 """ Restore valid entities """ 

112 

113 def run(self, text: str) -> str: 

114 text = text.replace(util.AMP_SUBSTITUTE, "&") 

115 return text 

116 

117 

118@util.deprecated( 

119 "This class is deprecated and will be removed in the future; " 

120 "use [`UnescapeTreeprocessor`][markdown.treeprocessors.UnescapeTreeprocessor] instead." 

121) 

122class UnescapePostprocessor(Postprocessor): 

123 """ Restore escaped chars. """ 

124 

125 RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX)) 

126 

127 def unescape(self, m: re.Match[str]) -> str: 

128 return chr(int(m.group(1))) 

129 

130 def run(self, text: str) -> str: 

131 return self.RE.sub(self.unescape, text)