Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mistune/scanner.py: 99%
87 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 06:10 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-03 06:10 +0000
1import re
3class Scanner(re.Scanner):
4 def iter(self, string, state, parse_text):
5 sc = self.scanner.scanner(string)
7 pos = 0
8 for match in iter(sc.search, None):
9 name, method = self.lexicon[match.lastindex - 1][1]
10 hole = string[pos:match.start()]
11 if hole:
12 yield parse_text(hole, state)
14 yield method(match, state)
15 pos = match.end()
17 hole = string[pos:]
18 if hole:
19 yield parse_text(hole, state)
22class ScannerParser(object):
23 scanner_cls = Scanner
24 RULE_NAMES = tuple()
26 def __init__(self):
27 self.rules = list(self.RULE_NAMES)
28 self.rule_methods = {}
29 self._cached_sc = {}
31 def register_rule(self, name, pattern, method):
32 self.rule_methods[name] = (pattern, lambda m, state: method(self, m, state))
34 def get_rule_pattern(self, name):
35 if name not in self.RULE_NAMES:
36 return self.rule_methods[name][0]
37 return getattr(self, name.upper())
39 def get_rule_method(self, name):
40 if name not in self.RULE_NAMES:
41 return self.rule_methods[name][1]
42 return getattr(self, 'parse_' + name)
44 def parse_text(self, text, state):
45 raise NotImplementedError
47 def _scan(self, s, state, rules):
48 sc = self._create_scanner(rules)
49 for tok in sc.iter(s, state, self.parse_text):
50 if isinstance(tok, list):
51 for t in tok:
52 yield t
53 elif tok:
54 yield tok
56 def _create_scanner(self, rules):
57 sc_key = '|'.join(rules)
58 sc = self._cached_sc.get(sc_key)
59 if sc:
60 return sc
62 lexicon = [
63 (self.get_rule_pattern(n), (n, self.get_rule_method(n)))
64 for n in rules
65 ]
66 sc = self.scanner_cls(lexicon)
67 self._cached_sc[sc_key] = sc
68 return sc
71class Matcher(object):
72 PARAGRAPH_END = re.compile(
73 r'(?:\n{2,})|'
74 r'(?:\n {0,3}#{1,6})|' # axt heading
75 r'(?:\n {0,3}(?:`{3,}|~{3,}))|' # fenced code
76 r'(?:\n {0,3}>)|' # blockquote
77 r'(?:\n {0,3}(?:[\*\+-]|1[.)]))|' # list
78 r'(?:\n {0,3}<)' # block html
79 )
81 def __init__(self, lexicon):
82 self.lexicon = lexicon
84 def search_pos(self, string, pos):
85 m = self.PARAGRAPH_END.search(string, pos)
86 if not m:
87 return None
88 if set(m.group(0)) == {'\n'}:
89 return m.end()
90 return m.start() + 1
92 def iter(self, string, state, parse_text):
93 pos = 0
94 endpos = len(string)
95 last_end = 0
96 while 1:
97 if pos >= endpos:
98 break
99 for rule, (name, method) in self.lexicon:
100 match = rule.match(string, pos)
101 if match is not None:
102 start, end = match.span()
103 if start > last_end:
104 yield parse_text(string[last_end:start], state)
106 if name.endswith('_start'):
107 token = method(match, state, string)
108 yield token[0]
109 end = token[1]
110 else:
111 yield method(match, state)
112 last_end = pos = end
113 break
114 else:
115 found = self.search_pos(string, pos)
116 if found is None:
117 break
118 pos = found
120 if last_end < endpos:
121 yield parse_text(string[last_end:], state)