Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/inline_parser.py: 99%

110 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-03 06:10 +0000

1import re 

2from .scanner import ScannerParser 

3from .util import PUNCTUATION, ESCAPE_TEXT, escape_url, unikey 

4 

5HTML_TAGNAME = r'[A-Za-z][A-Za-z0-9-]*' 

6HTML_ATTRIBUTES = ( 

7 r'(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*' 

8 r'(?:\s*=\s*(?:[^ "\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*' 

9) 

10ESCAPE_CHAR = re.compile(r'\\([' + PUNCTUATION + r'])') 

11LINK_TEXT = r'(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?' 

12LINK_LABEL = r'(?:[^\\\[\]]|' + ESCAPE_TEXT + r'){0,1000}' 

13 

14 

15class InlineParser(ScannerParser): 

16 ESCAPE = ESCAPE_TEXT 

17 

18 #: link or email syntax:: 

19 #: 

20 #: <https://example.com> 

21 AUTO_LINK = ( 

22 r'(?<!\\)(?:\\\\)*<([A-Za-z][A-Za-z0-9+.-]{1,31}:' 

23 r"[^ <>]*?|[A-Za-z0-9.!#$%&'*+/=?^_`{|}~-]+@[A-Za-z0-9]" 

24 r'(?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?' 

25 r'(?:\.[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?)*)>' 

26 ) 

27 

28 #: link or image syntax:: 

29 #: 

30 #: [text](/link "title") 

31 #: ![alt](/src "title") 

32 STD_LINK = ( 

33 r'!?\[(' + LINK_TEXT + r')\]\(\s*' 

34 

35 r'(<(?:\\[<>]?|[^\s<>\\])*>|' 

36 r'(?:\\[()]?|\([^\s\x00-\x1f\\]*\)|[^\s\x00-\x1f()\\])*?)' 

37 

38 r'(?:\s+(' 

39 r'''"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)''' 

40 r'))?\s*\)' 

41 ) 

42 

43 #: Get link from references. References are defined in DEF_LINK in blocks. 

44 #: The syntax looks like:: 

45 #: 

46 #: [an example][id] 

47 #: 

48 #: [id]: https://example.com "optional title" 

49 REF_LINK = ( 

50 r'!?\[(' + LINK_TEXT + r')\]' 

51 r'\[(' + LINK_LABEL + r')\]' 

52 ) 

53 

54 #: Simple form of reference link:: 

55 #: 

56 #: [an example] 

57 #: 

58 #: [an example]: https://example.com "optional title" 

59 REF_LINK2 = r'!?\[(' + LINK_LABEL + r')\]' 

60 

61 #: emphasis and strong * or _:: 

62 #: 

63 #: *emphasis* **strong** 

64 #: _emphasis_ __strong__ 

65 ASTERISK_EMPHASIS = ( 

66 r'(\*{1,2})(?=[^\s*])(' 

67 r'(?:(?:(?<!\\)(?:\\\\)*\*)|[^*])+?' 

68 r')(?<!\\)\1' 

69 ) 

70 UNDERSCORE_EMPHASIS = ( 

71 r'\b(_{1,2})(?=[^\s_])([\s\S]*?' 

72 r'(?:' + ESCAPE_TEXT + r'|[^\s_]))\1' 

73 r'(?!_|[^\s' + PUNCTUATION + r'])\b' 

74 ) 

75 

76 #: codespan with `:: 

77 #: 

78 #: `code` 

79 CODESPAN = ( 

80 r'(?<!\\|`)(?:\\\\)*(`+)(?!`)([\s\S]+?)(?<!`)\1(?!`)' 

81 ) 

82 

83 #: linebreak leaves two spaces at the end of line 

84 LINEBREAK = r'(?:\\| {2,})\n(?!\s*$)' 

85 

86 INLINE_HTML = ( 

87 r'(?<!\\)<' + HTML_TAGNAME + HTML_ATTRIBUTES + r'\s*/?>|' # open tag 

88 r'(?<!\\)</' + HTML_TAGNAME + r'\s*>|' # close tag 

89 r'(?<!\\)<!--(?!>|->)(?:(?!--)[\s\S])+?(?<!-)-->|' # comment 

90 r'(?<!\\)<\?[\s\S]+?\?>|' 

91 r'(?<!\\)<![A-Z][\s\S]+?>|' # doctype 

92 r'(?<!\\)<!\[CDATA[\s\S]+?\]\]>' # cdata 

93 ) 

94 

95 RULE_NAMES = ( 

96 'escape', 'inline_html', 'auto_link', 

97 'std_link', 'ref_link', 'ref_link2', 

98 'asterisk_emphasis', 'underscore_emphasis', 

99 'codespan', 'linebreak', 

100 ) 

101 

102 def __init__(self, renderer, hard_wrap=False): 

103 super(InlineParser, self).__init__() 

104 if hard_wrap: 

105 #: every new line becomes <br> 

106 self.LINEBREAK = r' *\n(?!\s*$)' 

107 self.renderer = renderer 

108 rules = list(self.RULE_NAMES) 

109 rules.remove('ref_link') 

110 rules.remove('ref_link2') 

111 self.ref_link_rules = rules 

112 

113 def parse_escape(self, m, state): 

114 text = m.group(0)[1:] 

115 return 'text', text 

116 

117 def parse_auto_link(self, m, state): 

118 if state.get('_in_link'): 

119 return 'text', m.group(0) 

120 

121 text = m.group(1) 

122 schemes = ('mailto:', 'http://', 'https://') 

123 if '@' in text and not text.lower().startswith(schemes): 

124 link = 'mailto:' + text 

125 else: 

126 link = text 

127 return 'link', escape_url(link), text 

128 

129 def parse_std_link(self, m, state): 

130 line = m.group(0) 

131 text = m.group(1) 

132 link = ESCAPE_CHAR.sub(r'\1', m.group(2)) 

133 if link.startswith('<') and link.endswith('>'): 

134 link = link[1:-1] 

135 

136 title = m.group(3) 

137 if title: 

138 title = ESCAPE_CHAR.sub(r'\1', title[1:-1]) 

139 

140 if line[0] == '!': 

141 return 'image', escape_url(link), text, title 

142 

143 return self.tokenize_link(line, link, text, title, state) 

144 

145 def parse_ref_link(self, m, state): 

146 line = m.group(0) 

147 text = m.group(1) 

148 key = unikey(m.group(2) or text) 

149 def_links = state.get('def_links') 

150 if not def_links or key not in def_links: 

151 return list(self._scan(line, state, self.ref_link_rules)) 

152 

153 link, title = def_links.get(key) 

154 link = ESCAPE_CHAR.sub(r'\1', link) 

155 if title: 

156 title = ESCAPE_CHAR.sub(r'\1', title) 

157 

158 if line[0] == '!': 

159 return 'image', escape_url(link), text, title 

160 

161 return self.tokenize_link(line, link, text, title, state) 

162 

163 def parse_ref_link2(self, m, state): 

164 return self.parse_ref_link(m, state) 

165 

166 def tokenize_link(self, line, link, text, title, state): 

167 if state.get('_in_link'): 

168 return 'text', line 

169 state['_in_link'] = True 

170 text = self.render(text, state) 

171 state['_in_link'] = False 

172 return 'link', escape_url(link), text, title 

173 

174 def parse_asterisk_emphasis(self, m, state): 

175 return self.tokenize_emphasis(m, state) 

176 

177 def parse_underscore_emphasis(self, m, state): 

178 return self.tokenize_emphasis(m, state) 

179 

180 def tokenize_emphasis(self, m, state): 

181 marker = m.group(1) 

182 text = m.group(2) 

183 if len(marker) == 1: 

184 return 'emphasis', self.render(text, state) 

185 return 'strong', self.render(text, state) 

186 

187 def parse_codespan(self, m, state): 

188 code = re.sub(r'[ \n]+', ' ', m.group(2).strip()) 

189 return 'codespan', code 

190 

191 def parse_linebreak(self, m, state): 

192 return 'linebreak', 

193 

194 def parse_inline_html(self, m, state): 

195 html = m.group(0) 

196 if html.startswith('<a '): 

197 state['_in_link'] = True 

198 if html.startswith('</a>'): 

199 state['_in_link'] = False 

200 return 'inline_html', html 

201 

202 def parse_text(self, text, state): 

203 return 'text', text 

204 

205 def parse(self, s, state, rules=None): 

206 if rules is None: 

207 rules = self.rules 

208 

209 tokens = ( 

210 self.renderer._get_method(t[0])(*t[1:]) 

211 for t in self._scan(s, state, rules) 

212 ) 

213 return tokens 

214 

215 def render(self, s, state, rules=None): 

216 tokens = self.parse(s, state, rules) 

217 return self.renderer.finalize(tokens) 

218 

219 def __call__(self, s, state): 

220 return self.render(s, state)