Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mistune/helpers.py: 99%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import re
2import string
3from typing import Any, Dict, Tuple, Union
5from .util import escape_url
7PREVENT_BACKSLASH = r"(?<!\\)(?:\\\\)*"
8PUNCTUATION = r"[" + re.escape(string.punctuation) + r"]"
10LINK_LABEL = r"(?:[^\\\[\]]|\\.){0,500}"
12LINK_BRACKET_START = re.compile(r"[ \t]*\n?[ \t]*<")
13LINK_BRACKET_RE = re.compile(r"<([^<>\n\\\x00]*)>")
14LINK_HREF_BLOCK_RE = re.compile(r"[ \t]*\n?[ \t]*([^\s]+)(?:\s|$)")
15LINK_HREF_INLINE_RE = re.compile(
16 r"[ \t]*\n?[ \t]*([^ \t\n]*?)(?:[ \t\n]|"
17 r"(?:" + PREVENT_BACKSLASH + r"\)))"
18)
20LINK_TITLE_RE = re.compile(
21 r"[ \t\n]+("
22 r'"(?:\\' + PUNCTUATION + r'|[^"\x00])*"|' # "title"
23 r"'(?:\\" + PUNCTUATION + r"|[^'\x00])*'" # 'title'
24 r")"
25)
26PAREN_END_RE = re.compile(r"\s*\)")
28HTML_TAGNAME = r"[A-Za-z][A-Za-z0-9-]*"
29HTML_ATTRIBUTES = (
30 r"(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*"
31 r'(?:\s*=\s*(?:[^ !"\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*'
32)
34BLOCK_TAGS = (
35 "address",
36 "article",
37 "aside",
38 "base",
39 "basefont",
40 "blockquote",
41 "body",
42 "caption",
43 "center",
44 "col",
45 "colgroup",
46 "dd",
47 "details",
48 "dialog",
49 "dir",
50 "div",
51 "dl",
52 "dt",
53 "fieldset",
54 "figcaption",
55 "figure",
56 "footer",
57 "form",
58 "frame",
59 "frameset",
60 "h1",
61 "h2",
62 "h3",
63 "h4",
64 "h5",
65 "h6",
66 "head",
67 "header",
68 "hr",
69 "html",
70 "iframe",
71 "legend",
72 "li",
73 "link",
74 "main",
75 "menu",
76 "menuitem",
77 "meta",
78 "nav",
79 "noframes",
80 "ol",
81 "optgroup",
82 "option",
83 "p",
84 "param",
85 "section",
86 "source",
87 "summary",
88 "table",
89 "tbody",
90 "td",
91 "tfoot",
92 "th",
93 "thead",
94 "title",
95 "tr",
96 "track",
97 "ul",
98)
99PRE_TAGS = ("pre", "script", "style", "textarea")
101_INLINE_LINK_LABEL_RE = re.compile(LINK_LABEL + r"\]")
102_INLINE_SQUARE_BRACKET_RE = re.compile(PREVENT_BACKSLASH + r"[\[\]]")
103_ESCAPE_CHAR_RE = re.compile(r"\\(" + PUNCTUATION + r")")
106def unescape_char(text: str) -> str:
107 return _ESCAPE_CHAR_RE.sub(r"\1", text)
110def parse_link_text(src: str, pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:
111 level = 1
112 found = False
113 start_pos = pos
115 while pos < len(src):
116 m = _INLINE_SQUARE_BRACKET_RE.search(src, pos)
117 if not m:
118 break
120 pos = m.end()
121 marker = m.group(0)
122 if marker == "]":
123 level -= 1
124 if level == 0:
125 found = True
126 break
127 else:
128 level += 1
130 if found:
131 text = src[start_pos : pos - 1]
132 return text, pos
133 return None, None
136def parse_link_label(src: str, start_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:
137 m = _INLINE_LINK_LABEL_RE.match(src, start_pos)
138 if m:
139 label = m.group(0)[:-1]
140 return label, m.end()
141 return None, None
144def parse_link_href(src: str, start_pos: int, block: bool = False) -> Union[Tuple[str, int], Tuple[None, None]]:
145 m = LINK_BRACKET_START.match(src, start_pos)
146 if m:
147 start_pos = m.end() - 1
148 m = LINK_BRACKET_RE.match(src, start_pos)
149 if m:
150 return m.group(1), m.end()
151 return None, None
153 if block:
154 m = LINK_HREF_BLOCK_RE.match(src, start_pos)
155 else:
156 m = LINK_HREF_INLINE_RE.match(src, start_pos)
158 if not m:
159 return None, None
161 end_pos = m.end()
162 href = m.group(1)
164 if block and src[end_pos - 1] == href[-1]:
165 return href, end_pos
166 return href, end_pos - 1
169def parse_link_title(src: str, start_pos: int, max_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:
170 m = LINK_TITLE_RE.match(src, start_pos, max_pos)
171 if m:
172 title = m.group(1)[1:-1]
173 title = unescape_char(title)
174 return title, m.end()
175 return None, None
178def parse_link(src: str, pos: int) -> Union[Tuple[Dict[str, Any], int], Tuple[None, None]]:
179 href, href_pos = parse_link_href(src, pos)
180 if href is None:
181 return None, None
182 assert href_pos is not None
183 title, title_pos = parse_link_title(src, href_pos, len(src))
184 next_pos = title_pos or href_pos
185 m = PAREN_END_RE.match(src, next_pos)
186 if not m:
187 return None, None
189 href = unescape_char(href)
190 attrs = {"url": escape_url(href)}
191 if title:
192 attrs["title"] = title
193 return attrs, m.end()