Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/helpers.py: 99%
86 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1import re
2import string
3from .util import escape_url
5PREVENT_BACKSLASH = r'(?<!\\)(?:\\\\)*'
6PUNCTUATION = r'[' + re.escape(string.punctuation) + r']'
8LINK_LABEL = r'(?:[^\\\[\]]|\\.){0,500}'
10LINK_BRACKET_START = re.compile(r'[ \t]*\n?[ \t]*<')
11LINK_BRACKET_RE = re.compile(r'<([^<>\n\\\x00]*)>')
12LINK_HREF_BLOCK_RE = re.compile(r'[ \t]*\n?[ \t]*([^\s]+)(?:\s|$)')
13LINK_HREF_INLINE_RE = re.compile(
14 r'[ \t]*\n?[ \t]*([^ \t\n]*?)(?:[ \t\n]|'
15 r'(?:' + PREVENT_BACKSLASH + r'\)))'
16)
18LINK_TITLE_RE = re.compile(
19 r'[ \t\n]+('
20 r'"(?:\\' + PUNCTUATION + r'|[^"\x00])*"|' # "title"
21 r"'(?:\\" + PUNCTUATION + r"|[^'\x00])*'" # 'title'
22 r')'
23)
24PAREN_END_RE = re.compile(r'\s*\)')
26HTML_TAGNAME = r'[A-Za-z][A-Za-z0-9-]*'
27HTML_ATTRIBUTES = (
28 r'(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*'
29 r'(?:\s*=\s*(?:[^ !"\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*'
30)
32BLOCK_TAGS = (
33 'address', 'article', 'aside', 'base', 'basefont', 'blockquote',
34 'body', 'caption', 'center', 'col', 'colgroup', 'dd', 'details',
35 'dialog', 'dir', 'div', 'dl', 'dt', 'fieldset', 'figcaption',
36 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3',
37 'h4', 'h5', 'h6', 'head', 'header', 'hr', 'html', 'iframe',
38 'legend', 'li', 'link', 'main', 'menu', 'menuitem', 'meta', 'nav',
39 'noframes', 'ol', 'optgroup', 'option', 'p', 'param', 'section',
40 'source', 'summary', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead',
41 'title', 'tr', 'track', 'ul'
42)
43PRE_TAGS = ('pre', 'script', 'style', 'textarea')
45_INLINE_LINK_LABEL_RE = re.compile(LINK_LABEL + r'\]')
46_INLINE_SQUARE_BRACKET_RE = re.compile(PREVENT_BACKSLASH + r'[\[\]]')
47_ESCAPE_CHAR_RE = re.compile(r'\\(' + PUNCTUATION + r')')
50def unescape_char(text):
51 return _ESCAPE_CHAR_RE.sub(r'\1', text)
54def parse_link_text(src, pos):
55 level = 1
56 found = False
57 start_pos = pos
59 while pos < len(src):
60 m = _INLINE_SQUARE_BRACKET_RE.search(src, pos)
61 if not m:
62 break
64 pos = m.end()
65 marker = m.group(0)
66 if marker == ']':
67 level -= 1
68 if level == 0:
69 found = True
70 break
71 else:
72 level += 1
74 if found:
75 text = src[start_pos:pos-1]
76 return text, pos
77 return None, None
80def parse_link_label(src, start_pos):
81 m = _INLINE_LINK_LABEL_RE.match(src, start_pos)
82 if m:
83 label = m.group(0)[:-1]
84 return label, m.end()
85 return None, None
88def parse_link_href(src, start_pos, block=False):
89 m = LINK_BRACKET_START.match(src, start_pos)
90 if m:
91 start_pos = m.end() - 1
92 m = LINK_BRACKET_RE.match(src, start_pos)
93 if m:
94 return m.group(1), m.end()
95 return None, None
97 if block:
98 m = LINK_HREF_BLOCK_RE.match(src, start_pos)
99 else:
100 m = LINK_HREF_INLINE_RE.match(src, start_pos)
102 if not m:
103 return None, None
105 end_pos = m.end()
106 href = m.group(1)
108 if block and src[end_pos - 1] == href[-1]:
109 return href, end_pos
110 return href, end_pos - 1
113def parse_link_title(src, start_pos, max_pos):
114 m = LINK_TITLE_RE.match(src, start_pos, max_pos)
115 if m:
116 title = m.group(1)[1:-1]
117 title = unescape_char(title)
118 return title, m.end()
119 return None, None
122def parse_link(src, pos):
123 href, href_pos = parse_link_href(src, pos)
124 if href is None:
125 return None, None
127 title, title_pos = parse_link_title(src, href_pos, len(src))
128 next_pos = title_pos or href_pos
129 m = PAREN_END_RE.match(src, next_pos)
130 if not m:
131 return None, None
133 href = unescape_char(href)
134 attrs = {'url': escape_url(href)}
135 if title:
136 attrs['title'] = title
137 return attrs, m.end()