Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/helpers.py: 99%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

88 statements  

1import re 

2import string 

3from typing import Any, Dict, Tuple, Union 

4 

5from .util import escape_url 

6 

7PREVENT_BACKSLASH = r"(?<!\\)(?:\\\\)*" 

8PUNCTUATION = r"[" + re.escape(string.punctuation) + r"]" 

9 

10LINK_LABEL = r"(?:[^\\\[\]]|\\.){0,500}" 

11 

12LINK_BRACKET_START = re.compile(r"[ \t]*\n?[ \t]*<") 

13LINK_BRACKET_RE = re.compile(r"<([^<>\n\\\x00]*)>") 

14LINK_HREF_BLOCK_RE = re.compile(r"[ \t]*\n?[ \t]*([^\s]+)(?:\s|$)") 

15LINK_HREF_INLINE_RE = re.compile( 

16 r"[ \t]*\n?[ \t]*([^ \t\n]*?)(?:[ \t\n]|" 

17 r"(?:" + PREVENT_BACKSLASH + r"\)))" 

18) 

19 

20LINK_TITLE_RE = re.compile( 

21 r"[ \t\n]+(" 

22 r'"(?:\\' + PUNCTUATION + r'|[^"\x00])*"|' # "title" 

23 r"'(?:\\" + PUNCTUATION + r"|[^'\x00])*'" # 'title' 

24 r")" 

25) 

26PAREN_END_RE = re.compile(r"\s*\)") 

27 

28HTML_TAGNAME = r"[A-Za-z][A-Za-z0-9-]*" 

29HTML_ATTRIBUTES = ( 

30 r"(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*" 

31 r'(?:\s*=\s*(?:[^ !"\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*' 

32) 

33 

34BLOCK_TAGS = ( 

35 "address", 

36 "article", 

37 "aside", 

38 "base", 

39 "basefont", 

40 "blockquote", 

41 "body", 

42 "caption", 

43 "center", 

44 "col", 

45 "colgroup", 

46 "dd", 

47 "details", 

48 "dialog", 

49 "dir", 

50 "div", 

51 "dl", 

52 "dt", 

53 "fieldset", 

54 "figcaption", 

55 "figure", 

56 "footer", 

57 "form", 

58 "frame", 

59 "frameset", 

60 "h1", 

61 "h2", 

62 "h3", 

63 "h4", 

64 "h5", 

65 "h6", 

66 "head", 

67 "header", 

68 "hr", 

69 "html", 

70 "iframe", 

71 "legend", 

72 "li", 

73 "link", 

74 "main", 

75 "menu", 

76 "menuitem", 

77 "meta", 

78 "nav", 

79 "noframes", 

80 "ol", 

81 "optgroup", 

82 "option", 

83 "p", 

84 "param", 

85 "section", 

86 "source", 

87 "summary", 

88 "table", 

89 "tbody", 

90 "td", 

91 "tfoot", 

92 "th", 

93 "thead", 

94 "title", 

95 "tr", 

96 "track", 

97 "ul", 

98) 

99PRE_TAGS = ("pre", "script", "style", "textarea") 

100 

101_INLINE_LINK_LABEL_RE = re.compile(LINK_LABEL + r"\]") 

102_INLINE_SQUARE_BRACKET_RE = re.compile(PREVENT_BACKSLASH + r"[\[\]]") 

103_ESCAPE_CHAR_RE = re.compile(r"\\(" + PUNCTUATION + r")") 

104 

105 

106def unescape_char(text: str) -> str: 

107 return _ESCAPE_CHAR_RE.sub(r"\1", text) 

108 

109 

110def parse_link_text(src: str, pos: int) -> Union[Tuple[str, int], Tuple[None, None]]: 

111 level = 1 

112 found = False 

113 start_pos = pos 

114 

115 while pos < len(src): 

116 m = _INLINE_SQUARE_BRACKET_RE.search(src, pos) 

117 if not m: 

118 break 

119 

120 pos = m.end() 

121 marker = m.group(0) 

122 if marker == "]": 

123 level -= 1 

124 if level == 0: 

125 found = True 

126 break 

127 else: 

128 level += 1 

129 

130 if found: 

131 text = src[start_pos : pos - 1] 

132 return text, pos 

133 return None, None 

134 

135 

136def parse_link_label(src: str, start_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]: 

137 m = _INLINE_LINK_LABEL_RE.match(src, start_pos) 

138 if m: 

139 label = m.group(0)[:-1] 

140 return label, m.end() 

141 return None, None 

142 

143 

144def parse_link_href(src: str, start_pos: int, block: bool = False) -> Union[Tuple[str, int], Tuple[None, None]]: 

145 m = LINK_BRACKET_START.match(src, start_pos) 

146 if m: 

147 start_pos = m.end() - 1 

148 m = LINK_BRACKET_RE.match(src, start_pos) 

149 if m: 

150 return m.group(1), m.end() 

151 return None, None 

152 

153 if block: 

154 m = LINK_HREF_BLOCK_RE.match(src, start_pos) 

155 else: 

156 m = LINK_HREF_INLINE_RE.match(src, start_pos) 

157 

158 if not m: 

159 return None, None 

160 

161 end_pos = m.end() 

162 href = m.group(1) 

163 

164 if block and src[end_pos - 1] == href[-1]: 

165 return href, end_pos 

166 return href, end_pos - 1 

167 

168 

169def parse_link_title(src: str, start_pos: int, max_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]: 

170 m = LINK_TITLE_RE.match(src, start_pos, max_pos) 

171 if m: 

172 title = m.group(1)[1:-1] 

173 title = unescape_char(title) 

174 return title, m.end() 

175 return None, None 

176 

177 

178def parse_link(src: str, pos: int) -> Union[Tuple[Dict[str, Any], int], Tuple[None, None]]: 

179 href, href_pos = parse_link_href(src, pos) 

180 if href is None: 

181 return None, None 

182 assert href_pos is not None 

183 title, title_pos = parse_link_title(src, href_pos, len(src)) 

184 next_pos = title_pos or href_pos 

185 m = PAREN_END_RE.match(src, next_pos) 

186 if not m: 

187 return None, None 

188 

189 href = unescape_char(href) 

190 attrs = {"url": escape_url(href)} 

191 if title: 

192 attrs["title"] = title 

193 return attrs, m.end()