Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/helpers.py: 99%

1import re

2import string

3from typing import Any, Dict, Tuple, Union

5from .util import escape_url

7PREVENT_BACKSLASH = r"(?<!\\)(?:\\\\)*"

8PUNCTUATION = r"[" + re.escape(string.punctuation) + r"]"

10LINK_LABEL = r"(?:[^\\\[\]]|\\.){0,500}"

12LINK_BRACKET_START = re.compile(r"[ \t]*\n?[ \t]*<")

13LINK_BRACKET_RE = re.compile(r"<([^<>\n\\\x00]*)>")

14LINK_HREF_BLOCK_RE = re.compile(r"[ \t]*\n?[ \t]*([^\s]+)(?:\s|$)")

15LINK_HREF_INLINE_RE = re.compile(

16 r"[ \t]*\n?[ \t]*([^ \t\n]*?)(?:[ \t\n]|"

17 r"(?:" + PREVENT_BACKSLASH + r"\)))"

18)

20LINK_TITLE_RE = re.compile(

21 r"[ \t\n]+("

22 r'"(?:\\' + PUNCTUATION + r'|[^"\x00])*"|' # "title"

23 r"'(?:\\" + PUNCTUATION + r"|[^'\x00])*'" # 'title'

24 r")"

25)

26PAREN_END_RE = re.compile(r"\s*\)")

28HTML_TAGNAME = r"[A-Za-z][A-Za-z0-9-]*"

29HTML_ATTRIBUTES = (

30 r"(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*"

31 r'(?:\s*=\s*(?:[^ !"\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*'

32)

34BLOCK_TAGS = (

35 "address",

36 "article",

37 "aside",

38 "base",

39 "basefont",

40 "blockquote",

41 "body",

42 "caption",

43 "center",

44 "col",

45 "colgroup",

46 "dd",

47 "details",

48 "dialog",

49 "dir",

50 "div",

51 "dl",

52 "dt",

53 "fieldset",

54 "figcaption",

55 "figure",

56 "footer",

57 "form",

58 "frame",

59 "frameset",

60 "h1",

61 "h2",

62 "h3",

63 "h4",

64 "h5",

65 "h6",

66 "head",

67 "header",

68 "hr",

69 "html",

70 "iframe",

71 "legend",

72 "li",

73 "link",

74 "main",

75 "menu",

76 "menuitem",

77 "meta",

78 "nav",

79 "noframes",

80 "ol",

81 "optgroup",

82 "option",

83 "p",

84 "param",

85 "section",

86 "source",

87 "summary",

88 "table",

89 "tbody",

90 "td",

91 "tfoot",

92 "th",

93 "thead",

94 "title",

95 "tr",

96 "track",

97 "ul",

98)

99PRE_TAGS = ("pre", "script", "style", "textarea")

100

101_INLINE_LINK_LABEL_RE = re.compile(LINK_LABEL + r"\]")

102_INLINE_SQUARE_BRACKET_RE = re.compile(PREVENT_BACKSLASH + r"[\[\]]")

103_ESCAPE_CHAR_RE = re.compile(r"\\(" + PUNCTUATION + r")")

104

105

106def unescape_char(text: str) -> str:

107 return _ESCAPE_CHAR_RE.sub(r"\1", text)

108

109

110def parse_link_text(src: str, pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:

111 level = 1

112 found = False

113 start_pos = pos

114

115 while pos < len(src):

116 m = _INLINE_SQUARE_BRACKET_RE.search(src, pos)

117 if not m:

118 break

119

120 pos = m.end()

121 marker = m.group(0)

122 if marker == "]":

123 level -= 1

124 if level == 0:

125 found = True

126 break

127 else:

128 level += 1

129

130 if found:

131 text = src[start_pos : pos - 1]

132 return text, pos

133 return None, None

134

135

136def parse_link_label(src: str, start_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:

137 m = _INLINE_LINK_LABEL_RE.match(src, start_pos)

138 if m:

139 label = m.group(0)[:-1]

140 return label, m.end()

141 return None, None

142

143

144def parse_link_href(src: str, start_pos: int, block: bool = False) -> Union[Tuple[str, int], Tuple[None, None]]:

145 m = LINK_BRACKET_START.match(src, start_pos)

146 if m:

147 start_pos = m.end() - 1

148 m = LINK_BRACKET_RE.match(src, start_pos)

149 if m:

150 return m.group(1), m.end()

151 return None, None

152

153 if block:

154 m = LINK_HREF_BLOCK_RE.match(src, start_pos)

155 else:

156 m = LINK_HREF_INLINE_RE.match(src, start_pos)

157

158 if not m:

159 return None, None

160

161 end_pos = m.end()

162 href = m.group(1)

163

164 if block and src[end_pos - 1] == href[-1]:

165 return href, end_pos

166 return href, end_pos - 1

167

168

169def parse_link_title(src: str, start_pos: int, max_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:

170 m = LINK_TITLE_RE.match(src, start_pos, max_pos)

171 if m:

172 title = m.group(1)[1:-1]

173 title = unescape_char(title)

174 return title, m.end()

175 return None, None

176

177

178def parse_link(src: str, pos: int) -> Union[Tuple[Dict[str, Any], int], Tuple[None, None]]:

179 href, href_pos = parse_link_href(src, pos)

180 if href is None:

181 return None, None

182 assert href_pos is not None

183 title, title_pos = parse_link_title(src, href_pos, len(src))

184 next_pos = title_pos or href_pos

185 m = PAREN_END_RE.match(src, next_pos)

186 if not m:

187 return None, None

188

189 href = unescape_char(href)

190 attrs = {"url": escape_url(href)}

191 if title:

192 attrs["title"] = title

193 return attrs, m.end()