Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/inline_parser.py: 99%
110 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 06:10 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 06:10 +0000
1import re
2from .scanner import ScannerParser
3from .util import PUNCTUATION, ESCAPE_TEXT, escape_url, unikey
5HTML_TAGNAME = r'[A-Za-z][A-Za-z0-9-]*'
6HTML_ATTRIBUTES = (
7 r'(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*'
8 r'(?:\s*=\s*(?:[^ "\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*'
9)
10ESCAPE_CHAR = re.compile(r'\\([' + PUNCTUATION + r'])')
11LINK_TEXT = r'(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?'
12LINK_LABEL = r'(?:[^\\\[\]]|' + ESCAPE_TEXT + r'){0,1000}'
15class InlineParser(ScannerParser):
16 ESCAPE = ESCAPE_TEXT
18 #: link or email syntax::
19 #:
20 #: <https://example.com>
21 AUTO_LINK = (
22 r'(?<!\\)(?:\\\\)*<([A-Za-z][A-Za-z0-9+.-]{1,31}:'
23 r"[^ <>]*?|[A-Za-z0-9.!#$%&'*+/=?^_`{|}~-]+@[A-Za-z0-9]"
24 r'(?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?'
25 r'(?:\.[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?)*)>'
26 )
28 #: link or image syntax::
29 #:
30 #: [text](/link "title")
31 #: 
32 STD_LINK = (
33 r'!?\[(' + LINK_TEXT + r')\]\(\s*'
35 r'(<(?:\\[<>]?|[^\s<>\\])*>|'
36 r'(?:\\[()]?|\([^\s\x00-\x1f\\]*\)|[^\s\x00-\x1f()\\])*?)'
38 r'(?:\s+('
39 r'''"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)'''
40 r'))?\s*\)'
41 )
43 #: Get link from references. References are defined in DEF_LINK in blocks.
44 #: The syntax looks like::
45 #:
46 #: [an example][id]
47 #:
48 #: [id]: https://example.com "optional title"
49 REF_LINK = (
50 r'!?\[(' + LINK_TEXT + r')\]'
51 r'\[(' + LINK_LABEL + r')\]'
52 )
54 #: Simple form of reference link::
55 #:
56 #: [an example]
57 #:
58 #: [an example]: https://example.com "optional title"
59 REF_LINK2 = r'!?\[(' + LINK_LABEL + r')\]'
61 #: emphasis and strong * or _::
62 #:
63 #: *emphasis* **strong**
64 #: _emphasis_ __strong__
65 ASTERISK_EMPHASIS = (
66 r'(\*{1,2})(?=[^\s*])('
67 r'(?:(?:(?<!\\)(?:\\\\)*\*)|[^*])+?'
68 r')(?<!\\)\1'
69 )
70 UNDERSCORE_EMPHASIS = (
71 r'\b(_{1,2})(?=[^\s_])([\s\S]*?'
72 r'(?:' + ESCAPE_TEXT + r'|[^\s_]))\1'
73 r'(?!_|[^\s' + PUNCTUATION + r'])\b'
74 )
76 #: codespan with `::
77 #:
78 #: `code`
79 CODESPAN = (
80 r'(?<!\\|`)(?:\\\\)*(`+)(?!`)([\s\S]+?)(?<!`)\1(?!`)'
81 )
83 #: linebreak leaves two spaces at the end of line
84 LINEBREAK = r'(?:\\| {2,})\n(?!\s*$)'
86 INLINE_HTML = (
87 r'(?<!\\)<' + HTML_TAGNAME + HTML_ATTRIBUTES + r'\s*/?>|' # open tag
88 r'(?<!\\)</' + HTML_TAGNAME + r'\s*>|' # close tag
89 r'(?<!\\)<!--(?!>|->)(?:(?!--)[\s\S])+?(?<!-)-->|' # comment
90 r'(?<!\\)<\?[\s\S]+?\?>|'
91 r'(?<!\\)<![A-Z][\s\S]+?>|' # doctype
92 r'(?<!\\)<!\[CDATA[\s\S]+?\]\]>' # cdata
93 )
95 RULE_NAMES = (
96 'escape', 'inline_html', 'auto_link',
97 'std_link', 'ref_link', 'ref_link2',
98 'asterisk_emphasis', 'underscore_emphasis',
99 'codespan', 'linebreak',
100 )
102 def __init__(self, renderer, hard_wrap=False):
103 super(InlineParser, self).__init__()
104 if hard_wrap:
105 #: every new line becomes <br>
106 self.LINEBREAK = r' *\n(?!\s*$)'
107 self.renderer = renderer
108 rules = list(self.RULE_NAMES)
109 rules.remove('ref_link')
110 rules.remove('ref_link2')
111 self.ref_link_rules = rules
113 def parse_escape(self, m, state):
114 text = m.group(0)[1:]
115 return 'text', text
117 def parse_auto_link(self, m, state):
118 if state.get('_in_link'):
119 return 'text', m.group(0)
121 text = m.group(1)
122 schemes = ('mailto:', 'http://', 'https://')
123 if '@' in text and not text.lower().startswith(schemes):
124 link = 'mailto:' + text
125 else:
126 link = text
127 return 'link', escape_url(link), text
129 def parse_std_link(self, m, state):
130 line = m.group(0)
131 text = m.group(1)
132 link = ESCAPE_CHAR.sub(r'\1', m.group(2))
133 if link.startswith('<') and link.endswith('>'):
134 link = link[1:-1]
136 title = m.group(3)
137 if title:
138 title = ESCAPE_CHAR.sub(r'\1', title[1:-1])
140 if line[0] == '!':
141 return 'image', escape_url(link), text, title
143 return self.tokenize_link(line, link, text, title, state)
145 def parse_ref_link(self, m, state):
146 line = m.group(0)
147 text = m.group(1)
148 key = unikey(m.group(2) or text)
149 def_links = state.get('def_links')
150 if not def_links or key not in def_links:
151 return list(self._scan(line, state, self.ref_link_rules))
153 link, title = def_links.get(key)
154 link = ESCAPE_CHAR.sub(r'\1', link)
155 if title:
156 title = ESCAPE_CHAR.sub(r'\1', title)
158 if line[0] == '!':
159 return 'image', escape_url(link), text, title
161 return self.tokenize_link(line, link, text, title, state)
163 def parse_ref_link2(self, m, state):
164 return self.parse_ref_link(m, state)
166 def tokenize_link(self, line, link, text, title, state):
167 if state.get('_in_link'):
168 return 'text', line
169 state['_in_link'] = True
170 text = self.render(text, state)
171 state['_in_link'] = False
172 return 'link', escape_url(link), text, title
174 def parse_asterisk_emphasis(self, m, state):
175 return self.tokenize_emphasis(m, state)
177 def parse_underscore_emphasis(self, m, state):
178 return self.tokenize_emphasis(m, state)
180 def tokenize_emphasis(self, m, state):
181 marker = m.group(1)
182 text = m.group(2)
183 if len(marker) == 1:
184 return 'emphasis', self.render(text, state)
185 return 'strong', self.render(text, state)
187 def parse_codespan(self, m, state):
188 code = re.sub(r'[ \n]+', ' ', m.group(2).strip())
189 return 'codespan', code
191 def parse_linebreak(self, m, state):
192 return 'linebreak',
194 def parse_inline_html(self, m, state):
195 html = m.group(0)
196 if html.startswith('<a '):
197 state['_in_link'] = True
198 if html.startswith('</a>'):
199 state['_in_link'] = False
200 return 'inline_html', html
202 def parse_text(self, text, state):
203 return 'text', text
205 def parse(self, s, state, rules=None):
206 if rules is None:
207 rules = self.rules
209 tokens = (
210 self.renderer._get_method(t[0])(*t[1:])
211 for t in self._scan(s, state, rules)
212 )
213 return tokens
215 def render(self, s, state, rules=None):
216 tokens = self.parse(s, state, rules)
217 return self.renderer.finalize(tokens)
219 def __call__(self, s, state):
220 return self.render(s, state)